xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision bcbf2dc56252e54b6497528bb28e6efa46bc1a66)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscTruth     aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscTruth     roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscTruth     ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (!aij->donotstash) {
394         if (roworiented) {
395           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
396         } else {
397           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
398         }
399       }
400     }
401   }
402   PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetValues_MPIAIJ"
407 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
408 {
409   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
412   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
413 
414   PetscFunctionBegin;
415   for (i=0; i<m; i++) {
416     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
417     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
418     if (idxm[i] >= rstart && idxm[i] < rend) {
419       row = idxm[i] - rstart;
420       for (j=0; j<n; j++) {
421         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
422         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
423         if (idxn[j] >= cstart && idxn[j] < cend){
424           col = idxn[j] - cstart;
425           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
426         } else {
427           if (!aij->colmap) {
428             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
429           }
430 #if defined (PETSC_USE_CTABLE)
431           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
432           col --;
433 #else
434           col = aij->colmap[idxn[j]] - 1;
435 #endif
436           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
437           else {
438             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
439           }
440         }
441       }
442     } else {
443       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
444     }
445   }
446   PetscFunctionReturn(0);
447 }
448 
449 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
453 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
454 {
455   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
456   PetscErrorCode ierr;
457   PetscInt       nstash,reallocs;
458   InsertMode     addv;
459 
460   PetscFunctionBegin;
461   if (aij->donotstash) {
462     PetscFunctionReturn(0);
463   }
464 
465   /* make sure all processors are either in INSERTMODE or ADDMODE */
466   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
467   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
468   mat->insertmode = addv; /* in case this processor had no cache */
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
472   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
478 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
481   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
482   PetscErrorCode ierr;
483   PetscMPIInt    n;
484   PetscInt       i,j,rstart,ncols,flg;
485   PetscInt       *row,*col;
486   PetscTruth     other_disassembled;
487   PetscScalar    *val;
488   InsertMode     addv = mat->insertmode;
489 
490   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
491   PetscFunctionBegin;
492   if (!aij->donotstash) {
493     while (1) {
494       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
495       if (!flg) break;
496 
497       for (i=0; i<n;) {
498         /* Now identify the consecutive vals belonging to the same row */
499         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
500         if (j < n) ncols = j-i;
501         else       ncols = n-i;
502         /* Now assemble all these values with a single function call */
503         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
504         i = j;
505       }
506     }
507     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
508   }
509   a->compressedrow.use     = PETSC_FALSE;
510   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
511   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
512 
513   /* determine if any processor has disassembled, if so we must
514      also disassemble ourselfs, in order that we may reassemble. */
515   /*
516      if nonzero structure of submatrix B cannot change then we know that
517      no processor disassembled thus we can skip this stuff
518   */
519   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
520     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
521     if (mat->was_assembled && !other_disassembled) {
522       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
523     }
524   }
525   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
526     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
527   }
528   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
529   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
530   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
531   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
532 
533   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
534   aij->rowvalues = 0;
535 
536   /* used by MatAXPY() */
537   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
538   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
539 
540   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
541   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
547 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
548 {
549   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551 
552   PetscFunctionBegin;
553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatZeroRows_MPIAIJ"
560 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
561 {
562   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
563   PetscErrorCode ierr;
564   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
565   PetscInt       i,*owners = A->rmap->range;
566   PetscInt       *nprocs,j,idx,nsends,row;
567   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
568   PetscInt       *rvalues,count,base,slen,*source;
569   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
570   MPI_Comm       comm = ((PetscObject)A)->comm;
571   MPI_Request    *send_waits,*recv_waits;
572   MPI_Status     recv_status,*send_status;
573 #if defined(PETSC_DEBUG)
574   PetscTruth     found = PETSC_FALSE;
575 #endif
576 
577   PetscFunctionBegin;
578   /*  first count number of contributors to each processor */
579   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
580   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
581   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
582   j = 0;
583   for (i=0; i<N; i++) {
584     if (lastidx > (idx = rows[i])) j = 0;
585     lastidx = idx;
586     for (; j<size; j++) {
587       if (idx >= owners[j] && idx < owners[j+1]) {
588         nprocs[2*j]++;
589         nprocs[2*j+1] = 1;
590         owner[i] = j;
591 #if defined(PETSC_DEBUG)
592         found = PETSC_TRUE;
593 #endif
594         break;
595       }
596     }
597 #if defined(PETSC_DEBUG)
598     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
599     found = PETSC_FALSE;
600 #endif
601   }
602   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
603 
604   /* inform other processors of number of messages and max length*/
605   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
606 
607   /* post receives:   */
608   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
609   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
610   for (i=0; i<nrecvs; i++) {
611     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
612   }
613 
614   /* do sends:
615       1) starts[i] gives the starting index in svalues for stuff going to
616          the ith processor
617   */
618   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
619   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
620   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
621   starts[0] = 0;
622   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
623   for (i=0; i<N; i++) {
624     svalues[starts[owner[i]]++] = rows[i];
625   }
626 
627   starts[0] = 0;
628   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
629   count = 0;
630   for (i=0; i<size; i++) {
631     if (nprocs[2*i+1]) {
632       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
633     }
634   }
635   ierr = PetscFree(starts);CHKERRQ(ierr);
636 
637   base = owners[rank];
638 
639   /*  wait on receives */
640   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
641   count  = nrecvs; slen = 0;
642   while (count) {
643     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
644     /* unpack receives into our local space */
645     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
646     source[imdex]  = recv_status.MPI_SOURCE;
647     lens[imdex]    = n;
648     slen          += n;
649     count--;
650   }
651   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
652 
653   /* move the data into the send scatter */
654   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
655   count = 0;
656   for (i=0; i<nrecvs; i++) {
657     values = rvalues + i*nmax;
658     for (j=0; j<lens[i]; j++) {
659       lrows[count++] = values[j] - base;
660     }
661   }
662   ierr = PetscFree(rvalues);CHKERRQ(ierr);
663   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
664   ierr = PetscFree(owner);CHKERRQ(ierr);
665   ierr = PetscFree(nprocs);CHKERRQ(ierr);
666 
667   /* actually zap the local rows */
668   /*
669         Zero the required rows. If the "diagonal block" of the matrix
670      is square and the user wishes to set the diagonal we use separate
671      code so that MatSetValues() is not called for each diagonal allocating
672      new memory, thus calling lots of mallocs and slowing things down.
673 
674   */
675   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
676   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
677   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
678     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
679   } else if (diag != 0.0) {
680     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
681     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
682       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
683 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
684     }
685     for (i = 0; i < slen; i++) {
686       row  = lrows[i] + rstart;
687       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
688     }
689     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
690     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
691   } else {
692     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
693   }
694   ierr = PetscFree(lrows);CHKERRQ(ierr);
695 
696   /* wait on sends */
697   if (nsends) {
698     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
699     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
700     ierr = PetscFree(send_status);CHKERRQ(ierr);
701   }
702   ierr = PetscFree(send_waits);CHKERRQ(ierr);
703   ierr = PetscFree(svalues);CHKERRQ(ierr);
704 
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatMult_MPIAIJ"
710 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
711 {
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713   PetscErrorCode ierr;
714   PetscInt       nt;
715 
716   PetscFunctionBegin;
717   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
718   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
719   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
720   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
721   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
722   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
728 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732 
733   PetscFunctionBegin;
734   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatMultAdd_MPIAIJ"
740 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
747   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
748   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
749   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
755 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
756 {
757   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
758   PetscErrorCode ierr;
759   PetscTruth     merged;
760 
761   PetscFunctionBegin;
762   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
763   /* do nondiagonal part */
764   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
765   if (!merged) {
766     /* send it on its way */
767     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
768     /* do local part */
769     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
770     /* receive remote parts: note this assumes the values are not actually */
771     /* added in yy until the next line, */
772     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
773   } else {
774     /* do local part */
775     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
776     /* send it on its way */
777     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
778     /* values actually were received in the Begin() but we need to call this nop */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   }
781   PetscFunctionReturn(0);
782 }
783 
784 EXTERN_C_BEGIN
785 #undef __FUNCT__
786 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
787 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
788 {
789   MPI_Comm       comm;
790   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
791   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
792   IS             Me,Notme;
793   PetscErrorCode ierr;
794   PetscInt       M,N,first,last,*notme,i;
795   PetscMPIInt    size;
796 
797   PetscFunctionBegin;
798 
799   /* Easy test: symmetric diagonal block */
800   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
801   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
802   if (!*f) PetscFunctionReturn(0);
803   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
805   if (size == 1) PetscFunctionReturn(0);
806 
807   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
808   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
809   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
810   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
811   for (i=0; i<first; i++) notme[i] = i;
812   for (i=last; i<M; i++) notme[i-last+first] = i;
813   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
814   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
815   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
816   Aoff = Aoffs[0];
817   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
818   Boff = Boffs[0];
819   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
820   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
821   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
822   ierr = ISDestroy(Me);CHKERRQ(ierr);
823   ierr = ISDestroy(Notme);CHKERRQ(ierr);
824 
825   PetscFunctionReturn(0);
826 }
827 EXTERN_C_END
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
831 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
832 {
833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
834   PetscErrorCode ierr;
835 
836   PetscFunctionBegin;
837   /* do nondiagonal part */
838   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
839   /* send it on its way */
840   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
841   /* do local part */
842   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
843   /* receive remote parts */
844   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 /*
849   This only works correctly for square matrices where the subblock A->A is the
850    diagonal block
851 */
852 #undef __FUNCT__
853 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
854 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
855 {
856   PetscErrorCode ierr;
857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
858 
859   PetscFunctionBegin;
860   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
861   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
862   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatScale_MPIAIJ"
868 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
869 {
870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
871   PetscErrorCode ierr;
872 
873   PetscFunctionBegin;
874   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
875   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatDestroy_MPIAIJ"
881 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
882 {
883   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
884   PetscErrorCode ierr;
885 
886   PetscFunctionBegin;
887 #if defined(PETSC_USE_LOG)
888   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
889 #endif
890   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
891   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
892   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
893   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
894 #if defined (PETSC_USE_CTABLE)
895   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
896 #else
897   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
898 #endif
899   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
900   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
901   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
902   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
903   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
904   ierr = PetscFree(aij);CHKERRQ(ierr);
905 
906   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
907   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
908   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
909   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
910   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
915   PetscFunctionReturn(0);
916 }
917 
918 #undef __FUNCT__
919 #define __FUNCT__ "MatView_MPIAIJ_Binary"
920 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
921 {
922   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
923   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
924   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
925   PetscErrorCode    ierr;
926   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
927   int               fd;
928   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
929   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
930   PetscScalar       *column_values;
931 
932   PetscFunctionBegin;
933   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
934   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
935   nz   = A->nz + B->nz;
936   if (!rank) {
937     header[0] = MAT_FILE_CLASSID;
938     header[1] = mat->rmap->N;
939     header[2] = mat->cmap->N;
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
941     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
942     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
943     /* get largest number of rows any processor has */
944     rlen = mat->rmap->n;
945     range = mat->rmap->range;
946     for (i=1; i<size; i++) {
947       rlen = PetscMax(rlen,range[i+1] - range[i]);
948     }
949   } else {
950     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
951     rlen = mat->rmap->n;
952   }
953 
954   /* load up the local row counts */
955   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
956   for (i=0; i<mat->rmap->n; i++) {
957     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
958   }
959 
960   /* store the row lengths to the file */
961   if (!rank) {
962     MPI_Status status;
963     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
964     for (i=1; i<size; i++) {
965       rlen = range[i+1] - range[i];
966       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
967       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     }
969   } else {
970     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
971   }
972   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
973 
974   /* load up the local column indices */
975   nzmax = nz; /* )th processor needs space a largest processor needs */
976   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
977   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
978   cnt  = 0;
979   for (i=0; i<mat->rmap->n; i++) {
980     for (j=B->i[i]; j<B->i[i+1]; j++) {
981       if ( (col = garray[B->j[j]]) > cstart) break;
982       column_indices[cnt++] = col;
983     }
984     for (k=A->i[i]; k<A->i[i+1]; k++) {
985       column_indices[cnt++] = A->j[k] + cstart;
986     }
987     for (; j<B->i[i+1]; j++) {
988       column_indices[cnt++] = garray[B->j[j]];
989     }
990   }
991   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
992 
993   /* store the column indices to the file */
994   if (!rank) {
995     MPI_Status status;
996     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
997     for (i=1; i<size; i++) {
998       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
999       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1000       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1001       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1002     }
1003   } else {
1004     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1005     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1006   }
1007   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1008 
1009   /* load up the local column values */
1010   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1011   cnt  = 0;
1012   for (i=0; i<mat->rmap->n; i++) {
1013     for (j=B->i[i]; j<B->i[i+1]; j++) {
1014       if ( garray[B->j[j]] > cstart) break;
1015       column_values[cnt++] = B->a[j];
1016     }
1017     for (k=A->i[i]; k<A->i[i+1]; k++) {
1018       column_values[cnt++] = A->a[k];
1019     }
1020     for (; j<B->i[i+1]; j++) {
1021       column_values[cnt++] = B->a[j];
1022     }
1023   }
1024   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1025 
1026   /* store the column values to the file */
1027   if (!rank) {
1028     MPI_Status status;
1029     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1030     for (i=1; i<size; i++) {
1031       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1032       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1033       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1034       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1035     }
1036   } else {
1037     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1038     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1039   }
1040   ierr = PetscFree(column_values);CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1046 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1047 {
1048   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1049   PetscErrorCode    ierr;
1050   PetscMPIInt       rank = aij->rank,size = aij->size;
1051   PetscTruth        isdraw,iascii,isbinary;
1052   PetscViewer       sviewer;
1053   PetscViewerFormat format;
1054 
1055   PetscFunctionBegin;
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1057   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1058   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1059   if (iascii) {
1060     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1061     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1062       MatInfo    info;
1063       PetscTruth inodes;
1064 
1065       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1066       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1067       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1068       if (!inodes) {
1069         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1070 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1071       } else {
1072         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1073 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1074       }
1075       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1076       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1077       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1078       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1079       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1080       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1081       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1082       PetscFunctionReturn(0);
1083     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1084       PetscInt   inodecount,inodelimit,*inodes;
1085       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1086       if (inodes) {
1087         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1088       } else {
1089         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1090       }
1091       PetscFunctionReturn(0);
1092     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1093       PetscFunctionReturn(0);
1094     }
1095   } else if (isbinary) {
1096     if (size == 1) {
1097       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1098       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1099     } else {
1100       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1101     }
1102     PetscFunctionReturn(0);
1103   } else if (isdraw) {
1104     PetscDraw  draw;
1105     PetscTruth isnull;
1106     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1107     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1108   }
1109 
1110   if (size == 1) {
1111     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1112     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1113   } else {
1114     /* assemble the entire matrix onto first processor. */
1115     Mat         A;
1116     Mat_SeqAIJ  *Aloc;
1117     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1118     MatScalar   *a;
1119 
1120     if (mat->rmap->N > 1024) {
1121       PetscTruth flg = PETSC_FALSE;
1122 
1123       ierr = PetscOptionsGetTruth(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1124       if (!flg) {
1125         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1126       }
1127     }
1128 
1129     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1130     if (!rank) {
1131       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1132     } else {
1133       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1134     }
1135     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1136     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqAIJ*)aij->A->data;
1142     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     row = mat->rmap->rstart;
1144     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1145     for (i=0; i<m; i++) {
1146       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1147       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1148     }
1149     aj = Aloc->j;
1150     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1151 
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqAIJ*)aij->B->data;
1154     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     row  = mat->rmap->rstart;
1156     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1157     ct   = cols;
1158     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1159     for (i=0; i<m; i++) {
1160       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1161       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1162     }
1163     ierr = PetscFree(ct);CHKERRQ(ierr);
1164     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1165     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1166     /*
1167        Everyone has to call to draw the matrix since the graphics waits are
1168        synchronized across all processors that share the PetscDraw object
1169     */
1170     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     if (!rank) {
1172       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1173       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1176     ierr = MatDestroy(A);CHKERRQ(ierr);
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatView_MPIAIJ"
1183 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1184 {
1185   PetscErrorCode ierr;
1186   PetscTruth     iascii,isdraw,issocket,isbinary;
1187 
1188   PetscFunctionBegin;
1189   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1190   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1191   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1193   if (iascii || isdraw || isbinary || issocket) {
1194     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1195   } else {
1196     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSOR_MPIAIJ"
1203 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1204 {
1205   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1206   PetscErrorCode ierr;
1207   Vec            bb1 = 0;
1208   PetscTruth     hasop;
1209 
1210   PetscFunctionBegin;
1211   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1212     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1213   }
1214 
1215   if (flag == SOR_APPLY_UPPER) {
1216     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1217     PetscFunctionReturn(0);
1218   }
1219 
1220   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1221     if (flag & SOR_ZERO_INITIAL_GUESS) {
1222       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1223       its--;
1224     }
1225 
1226     while (its--) {
1227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1229 
1230       /* update rhs: bb1 = bb - B*x */
1231       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1232       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1233 
1234       /* local sweep */
1235       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1236     }
1237   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1238     if (flag & SOR_ZERO_INITIAL_GUESS) {
1239       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1240       its--;
1241     }
1242     while (its--) {
1243       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1245 
1246       /* update rhs: bb1 = bb - B*x */
1247       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1248       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1249 
1250       /* local sweep */
1251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1252     }
1253   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1254     if (flag & SOR_ZERO_INITIAL_GUESS) {
1255       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1256       its--;
1257     }
1258     while (its--) {
1259       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1261 
1262       /* update rhs: bb1 = bb - B*x */
1263       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1264       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1265 
1266       /* local sweep */
1267       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1268     }
1269   }  else if (flag & SOR_EISENSTAT) {
1270     Vec         xx1;
1271 
1272     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1273     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1274 
1275     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1277     if (!mat->diag) {
1278       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1279       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1280     }
1281     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1282     if (hasop) {
1283       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1284     } else {
1285       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1286     }
1287     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1288 
1289     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1290 
1291     /* local sweep */
1292     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1293     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1294     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1295   } else {
1296     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1297   }
1298 
1299   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatPermute_MPIAIJ"
1305 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1306 {
1307   MPI_Comm       comm,pcomm;
1308   PetscInt       first,local_size,nrows;
1309   const PetscInt *rows;
1310   PetscMPIInt    size;
1311   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1316   /* make a collective version of 'rowp' */
1317   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1318   if (pcomm==comm) {
1319     crowp = rowp;
1320   } else {
1321     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1322     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1323     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1324     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1325   }
1326   /* collect the global row permutation and invert it */
1327   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1328   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1329   if (pcomm!=comm) {
1330     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1331   }
1332   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1333   /* get the local target indices */
1334   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1335   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1336   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1337   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
1338   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1339   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1340   /* the column permutation is so much easier;
1341      make a local version of 'colp' and invert it */
1342   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1343   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1344   if (size==1) {
1345     lcolp = colp;
1346   } else {
1347     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1348     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1349     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
1350   }
1351   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1352   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1354   if (size>1) {
1355     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1356     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1357   }
1358   /* now we just get the submatrix */
1359   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1360   /* clean up */
1361   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1362   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1368 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1371   Mat            A = mat->A,B = mat->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = 1.0;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   }
1404   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1405   info->fill_ratio_needed = 0;
1406   info->factor_mallocs    = 0;
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatSetOption_MPIAIJ"
1413 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1414 {
1415   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   switch (op) {
1420   case MAT_NEW_NONZERO_LOCATIONS:
1421   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1422   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1423   case MAT_KEEP_NONZERO_PATTERN:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425   case MAT_USE_INODES:
1426   case MAT_IGNORE_ZERO_ENTRIES:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = PETSC_TRUE;
1440     break;
1441   case MAT_SPD:
1442     A->spd_set                         = PETSC_TRUE;
1443     A->spd                             = flg;
1444     if (flg) {
1445       A->symmetric                     = PETSC_TRUE;
1446       A->structurally_symmetric        = PETSC_TRUE;
1447       A->symmetric_set                 = PETSC_TRUE;
1448       A->structurally_symmetric_set    = PETSC_TRUE;
1449     }
1450     break;
1451   case MAT_SYMMETRIC:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     break;
1454   case MAT_STRUCTURALLY_SYMMETRIC:
1455     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1456     break;
1457   case MAT_HERMITIAN:
1458     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_SYMMETRY_ETERNAL:
1461     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1462     break;
1463   default:
1464     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatGetRow_MPIAIJ"
1471 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1475   PetscErrorCode ierr;
1476   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1477   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1478   PetscInt       *cmap,*idx_p;
1479 
1480   PetscFunctionBegin;
1481   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1482   mat->getrowactive = PETSC_TRUE;
1483 
1484   if (!mat->rowvalues && (idx || v)) {
1485     /*
1486         allocate enough space to hold information from the longest row.
1487     */
1488     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1489     PetscInt   max = 1,tmp;
1490     for (i=0; i<matin->rmap->n; i++) {
1491       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1492       if (max < tmp) { max = tmp; }
1493     }
1494     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1495   }
1496 
1497   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1498   lrow = row - rstart;
1499 
1500   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1501   if (!v)   {pvA = 0; pvB = 0;}
1502   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1503   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1504   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1505   nztot = nzA + nzB;
1506 
1507   cmap  = mat->garray;
1508   if (v  || idx) {
1509     if (nztot) {
1510       /* Sort by increasing column numbers, assuming A and B already sorted */
1511       PetscInt imark = -1;
1512       if (v) {
1513         *v = v_p = mat->rowvalues;
1514         for (i=0; i<nzB; i++) {
1515           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1516           else break;
1517         }
1518         imark = i;
1519         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1520         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1521       }
1522       if (idx) {
1523         *idx = idx_p = mat->rowindices;
1524         if (imark > -1) {
1525           for (i=0; i<imark; i++) {
1526             idx_p[i] = cmap[cworkB[i]];
1527           }
1528         } else {
1529           for (i=0; i<nzB; i++) {
1530             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1531             else break;
1532           }
1533           imark = i;
1534         }
1535         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1536         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1537       }
1538     } else {
1539       if (idx) *idx = 0;
1540       if (v)   *v   = 0;
1541     }
1542   }
1543   *nz = nztot;
1544   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1545   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1551 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1552 {
1553   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1554 
1555   PetscFunctionBegin;
1556   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1557   aij->getrowactive = PETSC_FALSE;
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatNorm_MPIAIJ"
1563 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1564 {
1565   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1566   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,cstart = mat->cmap->rstart;
1569   PetscReal      sum = 0.0;
1570   MatScalar      *v;
1571 
1572   PetscFunctionBegin;
1573   if (aij->size == 1) {
1574     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1575   } else {
1576     if (type == NORM_FROBENIUS) {
1577       v = amat->a;
1578       for (i=0; i<amat->nz; i++) {
1579 #if defined(PETSC_USE_COMPLEX)
1580         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1581 #else
1582         sum += (*v)*(*v); v++;
1583 #endif
1584       }
1585       v = bmat->a;
1586       for (i=0; i<bmat->nz; i++) {
1587 #if defined(PETSC_USE_COMPLEX)
1588         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1589 #else
1590         sum += (*v)*(*v); v++;
1591 #endif
1592       }
1593       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1594       *norm = sqrt(*norm);
1595     } else if (type == NORM_1) { /* max column norm */
1596       PetscReal *tmp,*tmp2;
1597       PetscInt  *jj,*garray = aij->garray;
1598       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1599       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1600       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1601       *norm = 0.0;
1602       v = amat->a; jj = amat->j;
1603       for (j=0; j<amat->nz; j++) {
1604         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1605       }
1606       v = bmat->a; jj = bmat->j;
1607       for (j=0; j<bmat->nz; j++) {
1608         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1609       }
1610       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1611       for (j=0; j<mat->cmap->N; j++) {
1612         if (tmp2[j] > *norm) *norm = tmp2[j];
1613       }
1614       ierr = PetscFree(tmp);CHKERRQ(ierr);
1615       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1616     } else if (type == NORM_INFINITY) { /* max row norm */
1617       PetscReal ntemp = 0.0;
1618       for (j=0; j<aij->A->rmap->n; j++) {
1619         v = amat->a + amat->i[j];
1620         sum = 0.0;
1621         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1622           sum += PetscAbsScalar(*v); v++;
1623         }
1624         v = bmat->a + bmat->i[j];
1625         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1626           sum += PetscAbsScalar(*v); v++;
1627         }
1628         if (sum > ntemp) ntemp = sum;
1629       }
1630       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1631     } else {
1632       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1633     }
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatTranspose_MPIAIJ"
1640 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1641 {
1642   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1643   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1644   PetscErrorCode ierr;
1645   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1646   PetscInt       cstart=A->cmap->rstart,ncol;
1647   Mat            B;
1648   MatScalar      *array;
1649 
1650   PetscFunctionBegin;
1651   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1652 
1653   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1654   ai = Aloc->i; aj = Aloc->j;
1655   bi = Bloc->i; bj = Bloc->j;
1656   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1657     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1658     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1659     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1660     for (i=0; i<ai[ma]; i++){
1661       d_nnz[aj[i]] ++;
1662       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1663     }
1664 
1665     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1666     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1667     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1668     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1669     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1670   } else {
1671     B = *matout;
1672   }
1673 
1674   /* copy over the A part */
1675   array = Aloc->a;
1676   row = A->rmap->rstart;
1677   for (i=0; i<ma; i++) {
1678     ncol = ai[i+1]-ai[i];
1679     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1680     row++; array += ncol; aj += ncol;
1681   }
1682   aj = Aloc->j;
1683   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1684 
1685   /* copy over the B part */
1686   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1687   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1688   array = Bloc->a;
1689   row = A->rmap->rstart;
1690   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1691   cols_tmp = cols;
1692   for (i=0; i<mb; i++) {
1693     ncol = bi[i+1]-bi[i];
1694     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1695     row++; array += ncol; cols_tmp += ncol;
1696   }
1697   ierr = PetscFree(cols);CHKERRQ(ierr);
1698 
1699   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1700   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1702     *matout = B;
1703   } else {
1704     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1711 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1712 {
1713   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1714   Mat            a = aij->A,b = aij->B;
1715   PetscErrorCode ierr;
1716   PetscInt       s1,s2,s3;
1717 
1718   PetscFunctionBegin;
1719   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1720   if (rr) {
1721     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1722     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1723     /* Overlap communication with computation. */
1724     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1725   }
1726   if (ll) {
1727     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1728     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1729     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1730   }
1731   /* scale  the diagonal block */
1732   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1733 
1734   if (rr) {
1735     /* Do a scatter end and then right scale the off-diagonal block */
1736     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1738   }
1739 
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1745 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1746 {
1747   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1748   PetscErrorCode ierr;
1749 
1750   PetscFunctionBegin;
1751   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1752   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1753   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1754   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1759 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1760 {
1761   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 
1769 #undef __FUNCT__
1770 #define __FUNCT__ "MatEqual_MPIAIJ"
1771 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1772 {
1773   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1774   Mat            a,b,c,d;
1775   PetscTruth     flg;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   a = matA->A; b = matA->B;
1780   c = matB->A; d = matB->B;
1781 
1782   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1783   if (flg) {
1784     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1785   }
1786   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatCopy_MPIAIJ"
1792 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1793 {
1794   PetscErrorCode ierr;
1795   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1796   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1797 
1798   PetscFunctionBegin;
1799   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1800   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1801     /* because of the column compression in the off-processor part of the matrix a->B,
1802        the number of columns in a->B and b->B may be different, hence we cannot call
1803        the MatCopy() directly on the two parts. If need be, we can provide a more
1804        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1805        then copying the submatrices */
1806     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1809     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1816 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819 
1820   PetscFunctionBegin;
1821   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatAXPY_MPIAIJ"
1827 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1828 {
1829   PetscErrorCode ierr;
1830   PetscInt       i;
1831   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1832   PetscBLASInt   bnz,one=1;
1833   Mat_SeqAIJ     *x,*y;
1834 
1835   PetscFunctionBegin;
1836   if (str == SAME_NONZERO_PATTERN) {
1837     PetscScalar alpha = a;
1838     x = (Mat_SeqAIJ *)xx->A->data;
1839     y = (Mat_SeqAIJ *)yy->A->data;
1840     bnz = PetscBLASIntCast(x->nz);
1841     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1842     x = (Mat_SeqAIJ *)xx->B->data;
1843     y = (Mat_SeqAIJ *)yy->B->data;
1844     bnz = PetscBLASIntCast(x->nz);
1845     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1846   } else if (str == SUBSET_NONZERO_PATTERN) {
1847     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1848 
1849     x = (Mat_SeqAIJ *)xx->B->data;
1850     y = (Mat_SeqAIJ *)yy->B->data;
1851     if (y->xtoy && y->XtoY != xx->B) {
1852       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1853       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1854     }
1855     if (!y->xtoy) { /* get xtoy */
1856       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1857       y->XtoY = xx->B;
1858       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1859     }
1860     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1861   } else {
1862     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatConjugate_MPIAIJ"
1871 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1872 {
1873 #if defined(PETSC_USE_COMPLEX)
1874   PetscErrorCode ierr;
1875   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1876 
1877   PetscFunctionBegin;
1878   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1879   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1880 #else
1881   PetscFunctionBegin;
1882 #endif
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatRealPart_MPIAIJ"
1888 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1889 {
1890   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1895   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1901 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1902 {
1903   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1908   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #ifdef PETSC_HAVE_PBGL
1913 
1914 #include <boost/parallel/mpi/bsp_process_group.hpp>
1915 #include <boost/graph/distributed/ilu_default_graph.hpp>
1916 #include <boost/graph/distributed/ilu_0_block.hpp>
1917 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1918 #include <boost/graph/distributed/petsc/interface.hpp>
1919 #include <boost/multi_array.hpp>
1920 #include <boost/parallel/distributed_property_map->hpp>
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1924 /*
1925   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1926 */
1927 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1928 {
1929   namespace petsc = boost::distributed::petsc;
1930 
1931   namespace graph_dist = boost::graph::distributed;
1932   using boost::graph::distributed::ilu_default::process_group_type;
1933   using boost::graph::ilu_permuted;
1934 
1935   PetscTruth      row_identity, col_identity;
1936   PetscContainer  c;
1937   PetscInt        m, n, M, N;
1938   PetscErrorCode  ierr;
1939 
1940   PetscFunctionBegin;
1941   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1942   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1943   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1944   if (!row_identity || !col_identity) {
1945     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1946   }
1947 
1948   process_group_type pg;
1949   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1950   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1951   lgraph_type&   level_graph = *lgraph_p;
1952   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1953 
1954   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1955   ilu_permuted(level_graph);
1956 
1957   /* put together the new matrix */
1958   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1959   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1960   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1961   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
1962   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1963   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1965 
1966   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1967   ierr = PetscContainerSetPointer(c, lgraph_p);
1968   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1974 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatSolve_MPIAIJ"
1982 /*
1983   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1984 */
1985 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1986 {
1987   namespace graph_dist = boost::graph::distributed;
1988 
1989   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1990   lgraph_type*   lgraph_p;
1991   PetscContainer c;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1996   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1997   ierr = VecCopy(b, x);CHKERRQ(ierr);
1998 
1999   PetscScalar* array_x;
2000   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2001   PetscInt sx;
2002   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2003 
2004   PetscScalar* array_b;
2005   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2006   PetscInt sb;
2007   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2008 
2009   lgraph_type&   level_graph = *lgraph_p;
2010   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2011 
2012   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2013   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2014                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2015 
2016   typedef boost::iterator_property_map<array_ref_type::iterator,
2017                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2018   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2019                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2020 
2021   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2022 
2023   PetscFunctionReturn(0);
2024 }
2025 #endif
2026 
2027 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2028   PetscInt       nzlocal,nsends,nrecvs;
2029   PetscMPIInt    *send_rank,*recv_rank;
2030   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2031   PetscScalar    *sbuf_a,**rbuf_a;
2032   PetscErrorCode (*MatDestroy)(Mat);
2033 } Mat_Redundant;
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2037 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2038 {
2039   PetscErrorCode       ierr;
2040   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2041   PetscInt             i;
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2045   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2046   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2047   for (i=0; i<redund->nrecvs; i++){
2048     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2049     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2050   }
2051   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2052   ierr = PetscFree(redund);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatDestroy_MatRedundant"
2058 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2059 {
2060   PetscErrorCode  ierr;
2061   PetscContainer  container;
2062   Mat_Redundant   *redund=PETSC_NULL;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2066   if (container) {
2067     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2068   } else {
2069     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2070   }
2071   A->ops->destroy = redund->MatDestroy;
2072   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2073   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2074   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2080 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2081 {
2082   PetscMPIInt    rank,size;
2083   MPI_Comm       comm=((PetscObject)mat)->comm;
2084   PetscErrorCode ierr;
2085   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2086   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2087   PetscInt       *rowrange=mat->rmap->range;
2088   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2089   Mat            A=aij->A,B=aij->B,C=*matredundant;
2090   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2091   PetscScalar    *sbuf_a;
2092   PetscInt       nzlocal=a->nz+b->nz;
2093   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2094   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2095   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2096   MatScalar      *aworkA,*aworkB;
2097   PetscScalar    *vals;
2098   PetscMPIInt    tag1,tag2,tag3,imdex;
2099   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2100                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2101   MPI_Status     recv_status,*send_status;
2102   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2103   PetscInt       **rbuf_j=PETSC_NULL;
2104   PetscScalar    **rbuf_a=PETSC_NULL;
2105   Mat_Redundant  *redund=PETSC_NULL;
2106   PetscContainer container;
2107 
2108   PetscFunctionBegin;
2109   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111 
2112   if (reuse == MAT_REUSE_MATRIX) {
2113     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2114     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2115     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2116     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2117     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2118     if (container) {
2119       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2120     } else {
2121       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2122     }
2123     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2124 
2125     nsends    = redund->nsends;
2126     nrecvs    = redund->nrecvs;
2127     send_rank = redund->send_rank;
2128     recv_rank = redund->recv_rank;
2129     sbuf_nz   = redund->sbuf_nz;
2130     rbuf_nz   = redund->rbuf_nz;
2131     sbuf_j    = redund->sbuf_j;
2132     sbuf_a    = redund->sbuf_a;
2133     rbuf_j    = redund->rbuf_j;
2134     rbuf_a    = redund->rbuf_a;
2135   }
2136 
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscMPIInt  subrank,subsize;
2139     PetscInt     nleftover,np_subcomm;
2140     /* get the destination processors' id send_rank, nsends and nrecvs */
2141     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2142     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2143     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2144     np_subcomm = size/nsubcomm;
2145     nleftover  = size - nsubcomm*np_subcomm;
2146     nsends = 0; nrecvs = 0;
2147     for (i=0; i<size; i++){ /* i=rank*/
2148       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2149         send_rank[nsends] = i; nsends++;
2150         recv_rank[nrecvs++] = i;
2151       }
2152     }
2153     if (rank >= size - nleftover){/* this proc is a leftover processor */
2154       i = size-nleftover-1;
2155       j = 0;
2156       while (j < nsubcomm - nleftover){
2157         send_rank[nsends++] = i;
2158         i--; j++;
2159       }
2160     }
2161 
2162     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2163       for (i=0; i<nleftover; i++){
2164         recv_rank[nrecvs++] = size-nleftover+i;
2165       }
2166     }
2167 
2168     /* allocate sbuf_j, sbuf_a */
2169     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2170     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2171     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2172   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2173 
2174   /* copy mat's local entries into the buffers */
2175   if (reuse == MAT_INITIAL_MATRIX){
2176     rownz_max = 0;
2177     rptr = sbuf_j;
2178     cols = sbuf_j + rend-rstart + 1;
2179     vals = sbuf_a;
2180     rptr[0] = 0;
2181     for (i=0; i<rend-rstart; i++){
2182       row = i + rstart;
2183       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2184       ncols  = nzA + nzB;
2185       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2186       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2187       /* load the column indices for this row into cols */
2188       lwrite = 0;
2189       for (l=0; l<nzB; l++) {
2190         if ((ctmp = bmap[cworkB[l]]) < cstart){
2191           vals[lwrite]   = aworkB[l];
2192           cols[lwrite++] = ctmp;
2193         }
2194       }
2195       for (l=0; l<nzA; l++){
2196         vals[lwrite]   = aworkA[l];
2197         cols[lwrite++] = cstart + cworkA[l];
2198       }
2199       for (l=0; l<nzB; l++) {
2200         if ((ctmp = bmap[cworkB[l]]) >= cend){
2201           vals[lwrite]   = aworkB[l];
2202           cols[lwrite++] = ctmp;
2203         }
2204       }
2205       vals += ncols;
2206       cols += ncols;
2207       rptr[i+1] = rptr[i] + ncols;
2208       if (rownz_max < ncols) rownz_max = ncols;
2209     }
2210     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2211   } else { /* only copy matrix values into sbuf_a */
2212     rptr = sbuf_j;
2213     vals = sbuf_a;
2214     rptr[0] = 0;
2215     for (i=0; i<rend-rstart; i++){
2216       row = i + rstart;
2217       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2218       ncols  = nzA + nzB;
2219       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2220       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2221       lwrite = 0;
2222       for (l=0; l<nzB; l++) {
2223         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2224       }
2225       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2226       for (l=0; l<nzB; l++) {
2227         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2228       }
2229       vals += ncols;
2230       rptr[i+1] = rptr[i] + ncols;
2231     }
2232   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2233 
2234   /* send nzlocal to others, and recv other's nzlocal */
2235   /*--------------------------------------------------*/
2236   if (reuse == MAT_INITIAL_MATRIX){
2237     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2238     s_waits2 = s_waits3 + nsends;
2239     s_waits1 = s_waits2 + nsends;
2240     r_waits1 = s_waits1 + nsends;
2241     r_waits2 = r_waits1 + nrecvs;
2242     r_waits3 = r_waits2 + nrecvs;
2243   } else {
2244     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2245     r_waits3 = s_waits3 + nsends;
2246   }
2247 
2248   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2249   if (reuse == MAT_INITIAL_MATRIX){
2250     /* get new tags to keep the communication clean */
2251     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2252     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2253     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2254 
2255     /* post receives of other's nzlocal */
2256     for (i=0; i<nrecvs; i++){
2257       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2258     }
2259     /* send nzlocal to others */
2260     for (i=0; i<nsends; i++){
2261       sbuf_nz[i] = nzlocal;
2262       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2263     }
2264     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2265     count = nrecvs;
2266     while (count) {
2267       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2268       recv_rank[imdex] = recv_status.MPI_SOURCE;
2269       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2270       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2271 
2272       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2273       rbuf_nz[imdex] += i + 2;
2274       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2275       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2276       count--;
2277     }
2278     /* wait on sends of nzlocal */
2279     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2280     /* send mat->i,j to others, and recv from other's */
2281     /*------------------------------------------------*/
2282     for (i=0; i<nsends; i++){
2283       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2284       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2285     }
2286     /* wait on receives of mat->i,j */
2287     /*------------------------------*/
2288     count = nrecvs;
2289     while (count) {
2290       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2291       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2292       count--;
2293     }
2294     /* wait on sends of mat->i,j */
2295     /*---------------------------*/
2296     if (nsends) {
2297       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2298     }
2299   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2300 
2301   /* post receives, send and receive mat->a */
2302   /*----------------------------------------*/
2303   for (imdex=0; imdex<nrecvs; imdex++) {
2304     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2305   }
2306   for (i=0; i<nsends; i++){
2307     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2308   }
2309   count = nrecvs;
2310   while (count) {
2311     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2312     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2313     count--;
2314   }
2315   if (nsends) {
2316     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2317   }
2318 
2319   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2320 
2321   /* create redundant matrix */
2322   /*-------------------------*/
2323   if (reuse == MAT_INITIAL_MATRIX){
2324     /* compute rownz_max for preallocation */
2325     for (imdex=0; imdex<nrecvs; imdex++){
2326       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2327       rptr = rbuf_j[imdex];
2328       for (i=0; i<j; i++){
2329         ncols = rptr[i+1] - rptr[i];
2330         if (rownz_max < ncols) rownz_max = ncols;
2331       }
2332     }
2333 
2334     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2335     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2336     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2337     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2338     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2339   } else {
2340     C = *matredundant;
2341   }
2342 
2343   /* insert local matrix entries */
2344   rptr = sbuf_j;
2345   cols = sbuf_j + rend-rstart + 1;
2346   vals = sbuf_a;
2347   for (i=0; i<rend-rstart; i++){
2348     row   = i + rstart;
2349     ncols = rptr[i+1] - rptr[i];
2350     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351     vals += ncols;
2352     cols += ncols;
2353   }
2354   /* insert received matrix entries */
2355   for (imdex=0; imdex<nrecvs; imdex++){
2356     rstart = rowrange[recv_rank[imdex]];
2357     rend   = rowrange[recv_rank[imdex]+1];
2358     rptr = rbuf_j[imdex];
2359     cols = rbuf_j[imdex] + rend-rstart + 1;
2360     vals = rbuf_a[imdex];
2361     for (i=0; i<rend-rstart; i++){
2362       row   = i + rstart;
2363       ncols = rptr[i+1] - rptr[i];
2364       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2365       vals += ncols;
2366       cols += ncols;
2367     }
2368   }
2369   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2372   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     PetscContainer container;
2375     *matredundant = C;
2376     /* create a supporting struct and attach it to C for reuse */
2377     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2378     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2379     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2380     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2381     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2382 
2383     redund->nzlocal = nzlocal;
2384     redund->nsends  = nsends;
2385     redund->nrecvs  = nrecvs;
2386     redund->send_rank = send_rank;
2387     redund->recv_rank = recv_rank;
2388     redund->sbuf_nz = sbuf_nz;
2389     redund->rbuf_nz = rbuf_nz;
2390     redund->sbuf_j  = sbuf_j;
2391     redund->sbuf_a  = sbuf_a;
2392     redund->rbuf_j  = rbuf_j;
2393     redund->rbuf_a  = rbuf_a;
2394 
2395     redund->MatDestroy = C->ops->destroy;
2396     C->ops->destroy    = MatDestroy_MatRedundant;
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2404 {
2405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2406   PetscErrorCode ierr;
2407   PetscInt       i,*idxb = 0;
2408   PetscScalar    *va,*vb;
2409   Vec            vtmp;
2410 
2411   PetscFunctionBegin;
2412   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2413   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2414   if (idx) {
2415     for (i=0; i<A->rmap->n; i++) {
2416       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2417     }
2418   }
2419 
2420   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2421   if (idx) {
2422     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2423   }
2424   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2425   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2426 
2427   for (i=0; i<A->rmap->n; i++){
2428     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2429       va[i] = vb[i];
2430       if (idx) idx[i] = a->garray[idxb[i]];
2431     }
2432   }
2433 
2434   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2435   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2436   if (idxb) {
2437     ierr = PetscFree(idxb);CHKERRQ(ierr);
2438   }
2439   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2445 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2446 {
2447   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2448   PetscErrorCode ierr;
2449   PetscInt       i,*idxb = 0;
2450   PetscScalar    *va,*vb;
2451   Vec            vtmp;
2452 
2453   PetscFunctionBegin;
2454   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2455   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2456   if (idx) {
2457     for (i=0; i<A->cmap->n; i++) {
2458       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2459     }
2460   }
2461 
2462   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2463   if (idx) {
2464     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2465   }
2466   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2467   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2468 
2469   for (i=0; i<A->rmap->n; i++){
2470     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2471       va[i] = vb[i];
2472       if (idx) idx[i] = a->garray[idxb[i]];
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2478   if (idxb) {
2479     ierr = PetscFree(idxb);CHKERRQ(ierr);
2480   }
2481   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2487 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2488 {
2489   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2490   PetscInt       n      = A->rmap->n;
2491   PetscInt       cstart = A->cmap->rstart;
2492   PetscInt      *cmap   = mat->garray;
2493   PetscInt      *diagIdx, *offdiagIdx;
2494   Vec            diagV, offdiagV;
2495   PetscScalar   *a, *diagA, *offdiagA;
2496   PetscInt       r;
2497   PetscErrorCode ierr;
2498 
2499   PetscFunctionBegin;
2500   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2501   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2502   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2503   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2504   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2506   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2507   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2508   for(r = 0; r < n; ++r) {
2509     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2510       a[r]   = diagA[r];
2511       idx[r] = cstart + diagIdx[r];
2512     } else {
2513       a[r]   = offdiagA[r];
2514       idx[r] = cmap[offdiagIdx[r]];
2515     }
2516   }
2517   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2518   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2519   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2520   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2521   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2522   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2528 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2529 {
2530   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2531   PetscInt       n      = A->rmap->n;
2532   PetscInt       cstart = A->cmap->rstart;
2533   PetscInt      *cmap   = mat->garray;
2534   PetscInt      *diagIdx, *offdiagIdx;
2535   Vec            diagV, offdiagV;
2536   PetscScalar   *a, *diagA, *offdiagA;
2537   PetscInt       r;
2538   PetscErrorCode ierr;
2539 
2540   PetscFunctionBegin;
2541   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2542   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2543   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2544   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2545   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2546   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2547   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2548   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2549   for(r = 0; r < n; ++r) {
2550     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2551       a[r]   = diagA[r];
2552       idx[r] = cstart + diagIdx[r];
2553     } else {
2554       a[r]   = offdiagA[r];
2555       idx[r] = cmap[offdiagIdx[r]];
2556     }
2557   }
2558   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2559   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2560   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2561   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2562   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2563   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2569 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2570 {
2571   PetscErrorCode ierr;
2572   Mat            *dummy;
2573 
2574   PetscFunctionBegin;
2575   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2576   *newmat = *dummy;
2577   ierr = PetscFree(dummy);CHKERRQ(ierr);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2582 /* -------------------------------------------------------------------*/
2583 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2584        MatGetRow_MPIAIJ,
2585        MatRestoreRow_MPIAIJ,
2586        MatMult_MPIAIJ,
2587 /* 4*/ MatMultAdd_MPIAIJ,
2588        MatMultTranspose_MPIAIJ,
2589        MatMultTransposeAdd_MPIAIJ,
2590 #ifdef PETSC_HAVE_PBGL
2591        MatSolve_MPIAIJ,
2592 #else
2593        0,
2594 #endif
2595        0,
2596        0,
2597 /*10*/ 0,
2598        0,
2599        0,
2600        MatSOR_MPIAIJ,
2601        MatTranspose_MPIAIJ,
2602 /*15*/ MatGetInfo_MPIAIJ,
2603        MatEqual_MPIAIJ,
2604        MatGetDiagonal_MPIAIJ,
2605        MatDiagonalScale_MPIAIJ,
2606        MatNorm_MPIAIJ,
2607 /*20*/ MatAssemblyBegin_MPIAIJ,
2608        MatAssemblyEnd_MPIAIJ,
2609        MatSetOption_MPIAIJ,
2610        MatZeroEntries_MPIAIJ,
2611 /*24*/ MatZeroRows_MPIAIJ,
2612        0,
2613 #ifdef PETSC_HAVE_PBGL
2614        0,
2615 #else
2616        0,
2617 #endif
2618        0,
2619        0,
2620 /*29*/ MatSetUpPreallocation_MPIAIJ,
2621 #ifdef PETSC_HAVE_PBGL
2622        0,
2623 #else
2624        0,
2625 #endif
2626        0,
2627        0,
2628        0,
2629 /*34*/ MatDuplicate_MPIAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*39*/ MatAXPY_MPIAIJ,
2635        MatGetSubMatrices_MPIAIJ,
2636        MatIncreaseOverlap_MPIAIJ,
2637        MatGetValues_MPIAIJ,
2638        MatCopy_MPIAIJ,
2639 /*44*/ MatGetRowMax_MPIAIJ,
2640        MatScale_MPIAIJ,
2641        0,
2642        0,
2643        0,
2644 /*49*/ MatSetBlockSize_MPIAIJ,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*54*/ MatFDColoringCreate_MPIAIJ,
2650        0,
2651        MatSetUnfactored_MPIAIJ,
2652        MatPermute_MPIAIJ,
2653        0,
2654 /*59*/ MatGetSubMatrix_MPIAIJ,
2655        MatDestroy_MPIAIJ,
2656        MatView_MPIAIJ,
2657        0,
2658        0,
2659 /*64*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2665        MatGetRowMinAbs_MPIAIJ,
2666        0,
2667        MatSetColoring_MPIAIJ,
2668 #if defined(PETSC_HAVE_ADIC)
2669        MatSetValuesAdic_MPIAIJ,
2670 #else
2671        0,
2672 #endif
2673        MatSetValuesAdifor_MPIAIJ,
2674 /*75*/ MatFDColoringApply_AIJ,
2675        0,
2676        0,
2677        0,
2678        0,
2679 /*80*/ 0,
2680        0,
2681        0,
2682 /*83*/ MatLoad_MPIAIJ,
2683        0,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2689        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2690        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2691        MatPtAP_Basic,
2692        MatPtAPSymbolic_MPIAIJ,
2693 /*94*/ MatPtAPNumeric_MPIAIJ,
2694        0,
2695        0,
2696        0,
2697        0,
2698 /*99*/ 0,
2699        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2700        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2701        MatConjugate_MPIAIJ,
2702        0,
2703 /*104*/MatSetValuesRow_MPIAIJ,
2704        MatRealPart_MPIAIJ,
2705        MatImaginaryPart_MPIAIJ,
2706        0,
2707        0,
2708 /*109*/0,
2709        MatGetRedundantMatrix_MPIAIJ,
2710        MatGetRowMin_MPIAIJ,
2711        0,
2712        0,
2713 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*119*/0,
2719        0,
2720        0,
2721        0
2722 };
2723 
2724 /* ----------------------------------------------------------------------------------------*/
2725 
2726 EXTERN_C_BEGIN
2727 #undef __FUNCT__
2728 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2729 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2730 {
2731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2732   PetscErrorCode ierr;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2736   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 EXTERN_C_END
2740 
2741 EXTERN_C_BEGIN
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2745 {
2746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2751   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2752   PetscFunctionReturn(0);
2753 }
2754 EXTERN_C_END
2755 
2756 EXTERN_C_BEGIN
2757 #undef __FUNCT__
2758 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2759 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2760 {
2761   Mat_MPIAIJ     *b;
2762   PetscErrorCode ierr;
2763   PetscInt       i;
2764 
2765   PetscFunctionBegin;
2766   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2767   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2768   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2769   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2770 
2771   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2772   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2773   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2774   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2775   if (d_nnz) {
2776     for (i=0; i<B->rmap->n; i++) {
2777       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2778     }
2779   }
2780   if (o_nnz) {
2781     for (i=0; i<B->rmap->n; i++) {
2782       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2783     }
2784   }
2785   b = (Mat_MPIAIJ*)B->data;
2786 
2787   if (!B->preallocated) {
2788     /* Explicitly create 2 MATSEQAIJ matrices. */
2789     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2790     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2791     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2793     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2794     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2795     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2796     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2797   }
2798 
2799   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2800   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2801   B->preallocated = PETSC_TRUE;
2802   PetscFunctionReturn(0);
2803 }
2804 EXTERN_C_END
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2808 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2809 {
2810   Mat            mat;
2811   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   *newmat       = 0;
2816   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2818   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2819   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2820   a    = (Mat_MPIAIJ*)mat->data;
2821 
2822   mat->factortype    = matin->factortype;
2823   mat->rmap->bs      = matin->rmap->bs;
2824   mat->assembled    = PETSC_TRUE;
2825   mat->insertmode   = NOT_SET_VALUES;
2826   mat->preallocated = PETSC_TRUE;
2827 
2828   a->size           = oldmat->size;
2829   a->rank           = oldmat->rank;
2830   a->donotstash     = oldmat->donotstash;
2831   a->roworiented    = oldmat->roworiented;
2832   a->rowindices     = 0;
2833   a->rowvalues      = 0;
2834   a->getrowactive   = PETSC_FALSE;
2835 
2836   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2838 
2839   if (oldmat->colmap) {
2840 #if defined (PETSC_USE_CTABLE)
2841     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2842 #else
2843     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2844     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2845     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2846 #endif
2847   } else a->colmap = 0;
2848   if (oldmat->garray) {
2849     PetscInt len;
2850     len  = oldmat->B->cmap->n;
2851     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2852     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2853     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2854   } else a->garray = 0;
2855 
2856   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2857   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2859   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2860   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2861   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2862   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2863   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2864   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2865   *newmat = mat;
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatLoad_MPIAIJ"
2871 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2872 {
2873   PetscScalar    *vals,*svals;
2874   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2875   MPI_Status     status;
2876   PetscErrorCode ierr;
2877   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,mpicnt,mpimaxnz;
2878   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2879   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2880   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2881   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2882   int            fd;
2883 
2884   PetscFunctionBegin;
2885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2886   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2887   if (!rank) {
2888     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2889     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2890     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2891   }
2892 
2893   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2894 
2895   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2896   M = header[1]; N = header[2];
2897   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2898   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2899   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2900 
2901   /* If global sizes are set, check if they are consistent with that given in the file */
2902   if (sizesset) {
2903     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2904   }
2905   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2906   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2907 
2908   /* determine ownership of all rows */
2909   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2910   else m = newMat->rmap->n; /* Set by user */
2911 
2912   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2913   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2914 
2915   /* First process needs enough room for process with most rows */
2916   if (!rank) {
2917     mmax       = rowners[1];
2918     for (i=2; i<size; i++) {
2919       mmax = PetscMax(mmax,rowners[i]);
2920     }
2921   } else mmax = m;
2922 
2923   rowners[0] = 0;
2924   for (i=2; i<=size; i++) {
2925     rowners[i] += rowners[i-1];
2926   }
2927   rstart = rowners[rank];
2928   rend   = rowners[rank+1];
2929 
2930   /* distribute row lengths to all processors */
2931   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2932   if (!rank) {
2933     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2934     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2935     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2936     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2937     for (j=0; j<m; j++) {
2938       procsnz[0] += ourlens[j];
2939     }
2940     for (i=1; i<size; i++) {
2941       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2942       /* calculate the number of nonzeros on each processor */
2943       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2944         procsnz[i] += rowlengths[j];
2945       }
2946       mpicnt = PetscMPIIntCast(rowners[i+1]-rowners[i]);
2947       ierr   = MPI_Send(rowlengths,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2950   } else {
2951     mpicnt = PetscMPIIntCast(m);CHKERRQ(ierr);
2952     ierr   = MPI_Recv(ourlens,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2953   }
2954 
2955   if (!rank) {
2956     /* determine max buffer needed and allocate it */
2957     maxnz = 0;
2958     for (i=0; i<size; i++) {
2959       maxnz = PetscMax(maxnz,procsnz[i]);
2960     }
2961     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2962 
2963     /* read in my part of the matrix column indices  */
2964     nz   = procsnz[0];
2965     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2966     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2967 
2968     /* read in every one elses and ship off */
2969     for (i=1; i<size; i++) {
2970       nz     = procsnz[i];
2971       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2972       mpicnt = PetscMPIIntCast(nz);
2973       ierr   = MPI_Send(cols,mpicnt,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2974     }
2975     ierr = PetscFree(cols);CHKERRQ(ierr);
2976   } else {
2977     /* determine buffer space needed for message */
2978     nz = 0;
2979     for (i=0; i<m; i++) {
2980       nz += ourlens[i];
2981     }
2982     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2983 
2984     /* receive message of column indices*/
2985     mpicnt = PetscMPIIntCast(nz);CHKERRQ(ierr);
2986     ierr = MPI_Recv(mycols,mpicnt,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2987     ierr = MPI_Get_count(&status,MPIU_INT,&mpimaxnz);CHKERRQ(ierr);
2988     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
2989     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
2990     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
2991   }
2992 
2993   /* determine column ownership if matrix is not square */
2994   if (N != M) {
2995     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
2996     else n = newMat->cmap->n;
2997     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2998     cstart = cend - n;
2999   } else {
3000     cstart = rstart;
3001     cend   = rend;
3002     n      = cend - cstart;
3003   }
3004 
3005   /* loop over local rows, determining number of off diagonal entries */
3006   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3007   jj = 0;
3008   for (i=0; i<m; i++) {
3009     for (j=0; j<ourlens[i]; j++) {
3010       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3011       jj++;
3012     }
3013   }
3014 
3015   for (i=0; i<m; i++) {
3016     ourlens[i] -= offlens[i];
3017   }
3018   if (!sizesset) {
3019     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3020   }
3021   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3022 
3023   for (i=0; i<m; i++) {
3024     ourlens[i] += offlens[i];
3025   }
3026 
3027   if (!rank) {
3028     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3029 
3030     /* read in my part of the matrix numerical values  */
3031     nz   = procsnz[0];
3032     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3033 
3034     /* insert into matrix */
3035     jj      = rstart;
3036     smycols = mycols;
3037     svals   = vals;
3038     for (i=0; i<m; i++) {
3039       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3040       smycols += ourlens[i];
3041       svals   += ourlens[i];
3042       jj++;
3043     }
3044 
3045     /* read in other processors and ship out */
3046     for (i=1; i<size; i++) {
3047       nz     = procsnz[i];
3048       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3049       mpicnt = PetscMPIIntCast(nz);
3050       ierr   = MPI_Send(vals,mpicnt,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3051     }
3052     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3053   } else {
3054     /* receive numeric values */
3055     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3056 
3057     /* receive message of values*/
3058     mpicnt = PetscMPIIntCast(nz);
3059     ierr   = MPI_Recv(vals,mpicnt,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm,&status);CHKERRQ(ierr);
3060     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&mpimaxnz);CHKERRQ(ierr);
3061     if (mpimaxnz == MPI_UNDEFINED) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned MPI_UNDEFINED, expected %d",mpicnt);
3062     else if (mpimaxnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"MPI_Get_count() returned impossible negative value %d, expected %d",mpimaxnz,mpicnt);
3063     else if (mpimaxnz != mpicnt) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file: expected %d received %d",mpicnt,mpimaxnz);
3064 
3065     /* insert into matrix */
3066     jj      = rstart;
3067     smycols = mycols;
3068     svals   = vals;
3069     for (i=0; i<m; i++) {
3070       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3071       smycols += ourlens[i];
3072       svals   += ourlens[i];
3073       jj++;
3074     }
3075   }
3076   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3077   ierr = PetscFree(vals);CHKERRQ(ierr);
3078   ierr = PetscFree(mycols);CHKERRQ(ierr);
3079   ierr = PetscFree(rowners);CHKERRQ(ierr);
3080 
3081   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3082   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3088 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3089 {
3090   PetscErrorCode ierr;
3091   IS             iscol_local;
3092   PetscInt       csize;
3093 
3094   PetscFunctionBegin;
3095   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3096   if (call == MAT_REUSE_MATRIX) {
3097     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3098     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3099   } else {
3100     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3101   }
3102   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3103   if (call == MAT_INITIAL_MATRIX) {
3104     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3105     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3106   }
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 #undef __FUNCT__
3111 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3112 /*
3113     Not great since it makes two copies of the submatrix, first an SeqAIJ
3114   in local and then by concatenating the local matrices the end result.
3115   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3116 
3117   Note: This requires a sequential iscol with all indices.
3118 */
3119 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3120 {
3121   PetscErrorCode ierr;
3122   PetscMPIInt    rank,size;
3123   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3124   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3125   Mat            *local,M,Mreuse;
3126   MatScalar      *vwork,*aa;
3127   MPI_Comm       comm = ((PetscObject)mat)->comm;
3128   Mat_SeqAIJ     *aij;
3129 
3130 
3131   PetscFunctionBegin;
3132   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3133   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3134 
3135   if (call ==  MAT_REUSE_MATRIX) {
3136     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3137     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3138     local = &Mreuse;
3139     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3140   } else {
3141     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3142     Mreuse = *local;
3143     ierr   = PetscFree(local);CHKERRQ(ierr);
3144   }
3145 
3146   /*
3147       m - number of local rows
3148       n - number of columns (same on all processors)
3149       rstart - first row in new global matrix generated
3150   */
3151   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3152   if (call == MAT_INITIAL_MATRIX) {
3153     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3154     ii  = aij->i;
3155     jj  = aij->j;
3156 
3157     /*
3158         Determine the number of non-zeros in the diagonal and off-diagonal
3159         portions of the matrix in order to do correct preallocation
3160     */
3161 
3162     /* first get start and end of "diagonal" columns */
3163     if (csize == PETSC_DECIDE) {
3164       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3165       if (mglobal == n) { /* square matrix */
3166 	nlocal = m;
3167       } else {
3168         nlocal = n/size + ((n % size) > rank);
3169       }
3170     } else {
3171       nlocal = csize;
3172     }
3173     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3174     rstart = rend - nlocal;
3175     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3176 
3177     /* next, compute all the lengths */
3178     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3179     olens = dlens + m;
3180     for (i=0; i<m; i++) {
3181       jend = ii[i+1] - ii[i];
3182       olen = 0;
3183       dlen = 0;
3184       for (j=0; j<jend; j++) {
3185         if (*jj < rstart || *jj >= rend) olen++;
3186         else dlen++;
3187         jj++;
3188       }
3189       olens[i] = olen;
3190       dlens[i] = dlen;
3191     }
3192     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3193     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3194     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3195     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3196     ierr = PetscFree(dlens);CHKERRQ(ierr);
3197   } else {
3198     PetscInt ml,nl;
3199 
3200     M = *newmat;
3201     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3202     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3203     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3204     /*
3205          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3206        rather than the slower MatSetValues().
3207     */
3208     M->was_assembled = PETSC_TRUE;
3209     M->assembled     = PETSC_FALSE;
3210   }
3211   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3212   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3213   ii  = aij->i;
3214   jj  = aij->j;
3215   aa  = aij->a;
3216   for (i=0; i<m; i++) {
3217     row   = rstart + i;
3218     nz    = ii[i+1] - ii[i];
3219     cwork = jj;     jj += nz;
3220     vwork = aa;     aa += nz;
3221     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3222   }
3223 
3224   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3225   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3226   *newmat = M;
3227 
3228   /* save submatrix used in processor for next request */
3229   if (call ==  MAT_INITIAL_MATRIX) {
3230     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3231     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3232   }
3233 
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 EXTERN_C_BEGIN
3238 #undef __FUNCT__
3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3241 {
3242   PetscInt       m,cstart, cend,j,nnz,i,d;
3243   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3244   const PetscInt *JJ;
3245   PetscScalar    *values;
3246   PetscErrorCode ierr;
3247 
3248   PetscFunctionBegin;
3249   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3250 
3251   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3252   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3253   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3254   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3255   m      = B->rmap->n;
3256   cstart = B->cmap->rstart;
3257   cend   = B->cmap->rend;
3258   rstart = B->rmap->rstart;
3259 
3260   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3261 
3262 #if defined(PETSC_USE_DEBUGGING)
3263   for (i=0; i<m; i++) {
3264     nnz     = Ii[i+1]- Ii[i];
3265     JJ      = J + Ii[i];
3266     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3267     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3268     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3269   }
3270 #endif
3271 
3272   for (i=0; i<m; i++) {
3273     nnz     = Ii[i+1]- Ii[i];
3274     JJ      = J + Ii[i];
3275     nnz_max = PetscMax(nnz_max,nnz);
3276     d       = 0;
3277     for (j=0; j<nnz; j++) {
3278       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3279     }
3280     d_nnz[i] = d;
3281     o_nnz[i] = nnz - d;
3282   }
3283   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3284   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3285 
3286   if (v) values = (PetscScalar*)v;
3287   else {
3288     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3289     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3290   }
3291 
3292   for (i=0; i<m; i++) {
3293     ii   = i + rstart;
3294     nnz  = Ii[i+1]- Ii[i];
3295     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3296   }
3297   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3298   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3299 
3300   if (!v) {
3301     ierr = PetscFree(values);CHKERRQ(ierr);
3302   }
3303   PetscFunctionReturn(0);
3304 }
3305 EXTERN_C_END
3306 
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3309 /*@
3310    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3311    (the default parallel PETSc format).
3312 
3313    Collective on MPI_Comm
3314 
3315    Input Parameters:
3316 +  B - the matrix
3317 .  i - the indices into j for the start of each local row (starts with zero)
3318 .  j - the column indices for each local row (starts with zero)
3319 -  v - optional values in the matrix
3320 
3321    Level: developer
3322 
3323    Notes:
3324        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3325      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3326      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3327 
3328        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3329 
3330        The format which is used for the sparse matrix input, is equivalent to a
3331     row-major ordering.. i.e for the following matrix, the input data expected is
3332     as shown:
3333 
3334         1 0 0
3335         2 0 3     P0
3336        -------
3337         4 5 6     P1
3338 
3339      Process0 [P0]: rows_owned=[0,1]
3340         i =  {0,1,3}  [size = nrow+1  = 2+1]
3341         j =  {0,0,2}  [size = nz = 6]
3342         v =  {1,2,3}  [size = nz = 6]
3343 
3344      Process1 [P1]: rows_owned=[2]
3345         i =  {0,3}    [size = nrow+1  = 1+1]
3346         j =  {0,1,2}  [size = nz = 6]
3347         v =  {4,5,6}  [size = nz = 6]
3348 
3349 .keywords: matrix, aij, compressed row, sparse, parallel
3350 
3351 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3352           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3353 @*/
3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3355 {
3356   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3360   if (f) {
3361     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
3362   }
3363   PetscFunctionReturn(0);
3364 }
3365 
3366 #undef __FUNCT__
3367 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3368 /*@C
3369    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3370    (the default parallel PETSc format).  For good matrix assembly performance
3371    the user should preallocate the matrix storage by setting the parameters
3372    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3373    performance can be increased by more than a factor of 50.
3374 
3375    Collective on MPI_Comm
3376 
3377    Input Parameters:
3378 +  A - the matrix
3379 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3380            (same value is used for all local rows)
3381 .  d_nnz - array containing the number of nonzeros in the various rows of the
3382            DIAGONAL portion of the local submatrix (possibly different for each row)
3383            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3384            The size of this array is equal to the number of local rows, i.e 'm'.
3385            You must leave room for the diagonal entry even if it is zero.
3386 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3387            submatrix (same value is used for all local rows).
3388 -  o_nnz - array containing the number of nonzeros in the various rows of the
3389            OFF-DIAGONAL portion of the local submatrix (possibly different for
3390            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3391            structure. The size of this array is equal to the number
3392            of local rows, i.e 'm'.
3393 
3394    If the *_nnz parameter is given then the *_nz parameter is ignored
3395 
3396    The AIJ format (also called the Yale sparse matrix format or
3397    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3398    storage.  The stored row and column indices begin with zero.  See the users manual for details.
3399 
3400    The parallel matrix is partitioned such that the first m0 rows belong to
3401    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3402    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3403 
3404    The DIAGONAL portion of the local submatrix of a processor can be defined
3405    as the submatrix which is obtained by extraction the part corresponding
3406    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3407    first row that belongs to the processor, and r2 is the last row belonging
3408    to the this processor. This is a square mxm matrix. The remaining portion
3409    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3410 
3411    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3412 
3413    You can call MatGetInfo() to get information on how effective the preallocation was;
3414    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3415    You can also run with the option -info and look for messages with the string
3416    malloc in them to see if additional memory allocation was needed.
3417 
3418    Example usage:
3419 
3420    Consider the following 8x8 matrix with 34 non-zero values, that is
3421    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3422    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3423    as follows:
3424 
3425 .vb
3426             1  2  0  |  0  3  0  |  0  4
3427     Proc0   0  5  6  |  7  0  0  |  8  0
3428             9  0 10  | 11  0  0  | 12  0
3429     -------------------------------------
3430            13  0 14  | 15 16 17  |  0  0
3431     Proc1   0 18  0  | 19 20 21  |  0  0
3432             0  0  0  | 22 23  0  | 24  0
3433     -------------------------------------
3434     Proc2  25 26 27  |  0  0 28  | 29  0
3435            30  0  0  | 31 32 33  |  0 34
3436 .ve
3437 
3438    This can be represented as a collection of submatrices as:
3439 
3440 .vb
3441       A B C
3442       D E F
3443       G H I
3444 .ve
3445 
3446    Where the submatrices A,B,C are owned by proc0, D,E,F are
3447    owned by proc1, G,H,I are owned by proc2.
3448 
3449    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3450    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3451    The 'M','N' parameters are 8,8, and have the same values on all procs.
3452 
3453    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3454    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3455    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3456    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3457    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3458    matrix, ans [DF] as another SeqAIJ matrix.
3459 
3460    When d_nz, o_nz parameters are specified, d_nz storage elements are
3461    allocated for every row of the local diagonal submatrix, and o_nz
3462    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3463    One way to choose d_nz and o_nz is to use the max nonzerors per local
3464    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3465    In this case, the values of d_nz,o_nz are:
3466 .vb
3467      proc0 : dnz = 2, o_nz = 2
3468      proc1 : dnz = 3, o_nz = 2
3469      proc2 : dnz = 1, o_nz = 4
3470 .ve
3471    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3472    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3473    for proc3. i.e we are using 12+15+10=37 storage locations to store
3474    34 values.
3475 
3476    When d_nnz, o_nnz parameters are specified, the storage is specified
3477    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3478    In the above case the values for d_nnz,o_nnz are:
3479 .vb
3480      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3481      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3482      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3483 .ve
3484    Here the space allocated is sum of all the above values i.e 34, and
3485    hence pre-allocation is perfect.
3486 
3487    Level: intermediate
3488 
3489 .keywords: matrix, aij, compressed row, sparse, parallel
3490 
3491 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3492           MPIAIJ, MatGetInfo()
3493 @*/
3494 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3495 {
3496   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3497 
3498   PetscFunctionBegin;
3499   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3500   if (f) {
3501     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3502   }
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3508 /*@
3509      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3510          CSR format the local rows.
3511 
3512    Collective on MPI_Comm
3513 
3514    Input Parameters:
3515 +  comm - MPI communicator
3516 .  m - number of local rows (Cannot be PETSC_DECIDE)
3517 .  n - This value should be the same as the local size used in creating the
3518        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3519        calculated if N is given) For square matrices n is almost always m.
3520 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3521 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3522 .   i - row indices
3523 .   j - column indices
3524 -   a - matrix values
3525 
3526    Output Parameter:
3527 .   mat - the matrix
3528 
3529    Level: intermediate
3530 
3531    Notes:
3532        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3533      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3534      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3535 
3536        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3537 
3538        The format which is used for the sparse matrix input, is equivalent to a
3539     row-major ordering.. i.e for the following matrix, the input data expected is
3540     as shown:
3541 
3542         1 0 0
3543         2 0 3     P0
3544        -------
3545         4 5 6     P1
3546 
3547      Process0 [P0]: rows_owned=[0,1]
3548         i =  {0,1,3}  [size = nrow+1  = 2+1]
3549         j =  {0,0,2}  [size = nz = 6]
3550         v =  {1,2,3}  [size = nz = 6]
3551 
3552      Process1 [P1]: rows_owned=[2]
3553         i =  {0,3}    [size = nrow+1  = 1+1]
3554         j =  {0,1,2}  [size = nz = 6]
3555         v =  {4,5,6}  [size = nz = 6]
3556 
3557 .keywords: matrix, aij, compressed row, sparse, parallel
3558 
3559 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3560           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3561 @*/
3562 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3563 {
3564   PetscErrorCode ierr;
3565 
3566  PetscFunctionBegin;
3567   if (i[0]) {
3568     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3569   }
3570   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3571   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3572   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3573   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3574   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3575   PetscFunctionReturn(0);
3576 }
3577 
3578 #undef __FUNCT__
3579 #define __FUNCT__ "MatCreateMPIAIJ"
3580 /*@C
3581    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3582    (the default parallel PETSc format).  For good matrix assembly performance
3583    the user should preallocate the matrix storage by setting the parameters
3584    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3585    performance can be increased by more than a factor of 50.
3586 
3587    Collective on MPI_Comm
3588 
3589    Input Parameters:
3590 +  comm - MPI communicator
3591 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3592            This value should be the same as the local size used in creating the
3593            y vector for the matrix-vector product y = Ax.
3594 .  n - This value should be the same as the local size used in creating the
3595        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3596        calculated if N is given) For square matrices n is almost always m.
3597 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3598 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3599 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3600            (same value is used for all local rows)
3601 .  d_nnz - array containing the number of nonzeros in the various rows of the
3602            DIAGONAL portion of the local submatrix (possibly different for each row)
3603            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3604            The size of this array is equal to the number of local rows, i.e 'm'.
3605            You must leave room for the diagonal entry even if it is zero.
3606 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3607            submatrix (same value is used for all local rows).
3608 -  o_nnz - array containing the number of nonzeros in the various rows of the
3609            OFF-DIAGONAL portion of the local submatrix (possibly different for
3610            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3611            structure. The size of this array is equal to the number
3612            of local rows, i.e 'm'.
3613 
3614    Output Parameter:
3615 .  A - the matrix
3616 
3617    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3618    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3619    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3620 
3621    Notes:
3622    If the *_nnz parameter is given then the *_nz parameter is ignored
3623 
3624    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3625    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3626    storage requirements for this matrix.
3627 
3628    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3629    processor than it must be used on all processors that share the object for
3630    that argument.
3631 
3632    The user MUST specify either the local or global matrix dimensions
3633    (possibly both).
3634 
3635    The parallel matrix is partitioned across processors such that the
3636    first m0 rows belong to process 0, the next m1 rows belong to
3637    process 1, the next m2 rows belong to process 2 etc.. where
3638    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3639    values corresponding to [m x N] submatrix.
3640 
3641    The columns are logically partitioned with the n0 columns belonging
3642    to 0th partition, the next n1 columns belonging to the next
3643    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3644 
3645    The DIAGONAL portion of the local submatrix on any given processor
3646    is the submatrix corresponding to the rows and columns m,n
3647    corresponding to the given processor. i.e diagonal matrix on
3648    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3649    etc. The remaining portion of the local submatrix [m x (N-n)]
3650    constitute the OFF-DIAGONAL portion. The example below better
3651    illustrates this concept.
3652 
3653    For a square global matrix we define each processor's diagonal portion
3654    to be its local rows and the corresponding columns (a square submatrix);
3655    each processor's off-diagonal portion encompasses the remainder of the
3656    local matrix (a rectangular submatrix).
3657 
3658    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3659 
3660    When calling this routine with a single process communicator, a matrix of
3661    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3662    type of communicator, use the construction mechanism:
3663      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3664 
3665    By default, this format uses inodes (identical nodes) when possible.
3666    We search for consecutive rows with the same nonzero structure, thereby
3667    reusing matrix information to achieve increased efficiency.
3668 
3669    Options Database Keys:
3670 +  -mat_no_inode  - Do not use inodes
3671 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3672 -  -mat_aij_oneindex - Internally use indexing starting at 1
3673         rather than 0.  Note that when calling MatSetValues(),
3674         the user still MUST index entries starting at 0!
3675 
3676 
3677    Example usage:
3678 
3679    Consider the following 8x8 matrix with 34 non-zero values, that is
3680    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3681    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3682    as follows:
3683 
3684 .vb
3685             1  2  0  |  0  3  0  |  0  4
3686     Proc0   0  5  6  |  7  0  0  |  8  0
3687             9  0 10  | 11  0  0  | 12  0
3688     -------------------------------------
3689            13  0 14  | 15 16 17  |  0  0
3690     Proc1   0 18  0  | 19 20 21  |  0  0
3691             0  0  0  | 22 23  0  | 24  0
3692     -------------------------------------
3693     Proc2  25 26 27  |  0  0 28  | 29  0
3694            30  0  0  | 31 32 33  |  0 34
3695 .ve
3696 
3697    This can be represented as a collection of submatrices as:
3698 
3699 .vb
3700       A B C
3701       D E F
3702       G H I
3703 .ve
3704 
3705    Where the submatrices A,B,C are owned by proc0, D,E,F are
3706    owned by proc1, G,H,I are owned by proc2.
3707 
3708    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3709    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3710    The 'M','N' parameters are 8,8, and have the same values on all procs.
3711 
3712    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3713    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3714    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3715    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3716    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3717    matrix, ans [DF] as another SeqAIJ matrix.
3718 
3719    When d_nz, o_nz parameters are specified, d_nz storage elements are
3720    allocated for every row of the local diagonal submatrix, and o_nz
3721    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3722    One way to choose d_nz and o_nz is to use the max nonzerors per local
3723    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3724    In this case, the values of d_nz,o_nz are:
3725 .vb
3726      proc0 : dnz = 2, o_nz = 2
3727      proc1 : dnz = 3, o_nz = 2
3728      proc2 : dnz = 1, o_nz = 4
3729 .ve
3730    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3731    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3732    for proc3. i.e we are using 12+15+10=37 storage locations to store
3733    34 values.
3734 
3735    When d_nnz, o_nnz parameters are specified, the storage is specified
3736    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3737    In the above case the values for d_nnz,o_nnz are:
3738 .vb
3739      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3740      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3741      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3742 .ve
3743    Here the space allocated is sum of all the above values i.e 34, and
3744    hence pre-allocation is perfect.
3745 
3746    Level: intermediate
3747 
3748 .keywords: matrix, aij, compressed row, sparse, parallel
3749 
3750 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3751           MPIAIJ, MatCreateMPIAIJWithArrays()
3752 @*/
3753 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3754 {
3755   PetscErrorCode ierr;
3756   PetscMPIInt    size;
3757 
3758   PetscFunctionBegin;
3759   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3760   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3761   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3762   if (size > 1) {
3763     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3764     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3765   } else {
3766     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3767     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3768   }
3769   PetscFunctionReturn(0);
3770 }
3771 
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3774 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3775 {
3776   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3777 
3778   PetscFunctionBegin;
3779   *Ad     = a->A;
3780   *Ao     = a->B;
3781   *colmap = a->garray;
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 #undef __FUNCT__
3786 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3787 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3788 {
3789   PetscErrorCode ierr;
3790   PetscInt       i;
3791   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3792 
3793   PetscFunctionBegin;
3794   if (coloring->ctype == IS_COLORING_GLOBAL) {
3795     ISColoringValue *allcolors,*colors;
3796     ISColoring      ocoloring;
3797 
3798     /* set coloring for diagonal portion */
3799     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3800 
3801     /* set coloring for off-diagonal portion */
3802     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3803     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3804     for (i=0; i<a->B->cmap->n; i++) {
3805       colors[i] = allcolors[a->garray[i]];
3806     }
3807     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3808     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3809     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3810     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3811   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3812     ISColoringValue *colors;
3813     PetscInt        *larray;
3814     ISColoring      ocoloring;
3815 
3816     /* set coloring for diagonal portion */
3817     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3818     for (i=0; i<a->A->cmap->n; i++) {
3819       larray[i] = i + A->cmap->rstart;
3820     }
3821     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3822     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3823     for (i=0; i<a->A->cmap->n; i++) {
3824       colors[i] = coloring->colors[larray[i]];
3825     }
3826     ierr = PetscFree(larray);CHKERRQ(ierr);
3827     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3828     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3829     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3830 
3831     /* set coloring for off-diagonal portion */
3832     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3833     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3834     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3835     for (i=0; i<a->B->cmap->n; i++) {
3836       colors[i] = coloring->colors[larray[i]];
3837     }
3838     ierr = PetscFree(larray);CHKERRQ(ierr);
3839     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3840     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3841     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3842   } else {
3843     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3844   }
3845 
3846   PetscFunctionReturn(0);
3847 }
3848 
3849 #if defined(PETSC_HAVE_ADIC)
3850 #undef __FUNCT__
3851 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3852 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3853 {
3854   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3855   PetscErrorCode ierr;
3856 
3857   PetscFunctionBegin;
3858   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3859   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3860   PetscFunctionReturn(0);
3861 }
3862 #endif
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3866 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3867 {
3868   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3869   PetscErrorCode ierr;
3870 
3871   PetscFunctionBegin;
3872   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3873   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3874   PetscFunctionReturn(0);
3875 }
3876 
3877 #undef __FUNCT__
3878 #define __FUNCT__ "MatMerge"
3879 /*@
3880       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3881                  matrices from each processor
3882 
3883     Collective on MPI_Comm
3884 
3885    Input Parameters:
3886 +    comm - the communicators the parallel matrix will live on
3887 .    inmat - the input sequential matrices
3888 .    n - number of local columns (or PETSC_DECIDE)
3889 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3890 
3891    Output Parameter:
3892 .    outmat - the parallel matrix generated
3893 
3894     Level: advanced
3895 
3896    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3897 
3898 @*/
3899 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3900 {
3901   PetscErrorCode ierr;
3902   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3903   PetscInt       *indx;
3904   PetscScalar    *values;
3905 
3906   PetscFunctionBegin;
3907   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3908   if (scall == MAT_INITIAL_MATRIX){
3909     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3910     if (n == PETSC_DECIDE){
3911       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3912     }
3913     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3914     rstart -= m;
3915 
3916     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3917     for (i=0;i<m;i++) {
3918       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3919       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3920       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3921     }
3922     /* This routine will ONLY return MPIAIJ type matrix */
3923     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3924     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3925     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3926     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3927     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3928 
3929   } else if (scall == MAT_REUSE_MATRIX){
3930     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3931   } else {
3932     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3933   }
3934 
3935   for (i=0;i<m;i++) {
3936     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3937     Ii    = i + rstart;
3938     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3939     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3940   }
3941   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3942   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3943   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3944 
3945   PetscFunctionReturn(0);
3946 }
3947 
3948 #undef __FUNCT__
3949 #define __FUNCT__ "MatFileSplit"
3950 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3951 {
3952   PetscErrorCode    ierr;
3953   PetscMPIInt       rank;
3954   PetscInt          m,N,i,rstart,nnz;
3955   size_t            len;
3956   const PetscInt    *indx;
3957   PetscViewer       out;
3958   char              *name;
3959   Mat               B;
3960   const PetscScalar *values;
3961 
3962   PetscFunctionBegin;
3963   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3964   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3965   /* Should this be the type of the diagonal block of A? */
3966   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3967   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3968   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3969   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3970   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3971   for (i=0;i<m;i++) {
3972     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3973     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3974     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3975   }
3976   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3978 
3979   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3980   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3981   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3982   sprintf(name,"%s.%d",outfile,rank);
3983   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3984   ierr = PetscFree(name);
3985   ierr = MatView(B,out);CHKERRQ(ierr);
3986   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3987   ierr = MatDestroy(B);CHKERRQ(ierr);
3988   PetscFunctionReturn(0);
3989 }
3990 
3991 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3992 #undef __FUNCT__
3993 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3994 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3995 {
3996   PetscErrorCode       ierr;
3997   Mat_Merge_SeqsToMPI  *merge;
3998   PetscContainer       container;
3999 
4000   PetscFunctionBegin;
4001   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4002   if (container) {
4003     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4004     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4005     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4006     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4007     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4008     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4009     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4010     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4011     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4012     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4013     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4014     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4015     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4016     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4017 
4018     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4019     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4020   }
4021   ierr = PetscFree(merge);CHKERRQ(ierr);
4022 
4023   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4024   PetscFunctionReturn(0);
4025 }
4026 
4027 #include "../src/mat/utils/freespace.h"
4028 #include "petscbt.h"
4029 
4030 #undef __FUNCT__
4031 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4032 /*@C
4033       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4034                  matrices from each processor
4035 
4036     Collective on MPI_Comm
4037 
4038    Input Parameters:
4039 +    comm - the communicators the parallel matrix will live on
4040 .    seqmat - the input sequential matrices
4041 .    m - number of local rows (or PETSC_DECIDE)
4042 .    n - number of local columns (or PETSC_DECIDE)
4043 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4044 
4045    Output Parameter:
4046 .    mpimat - the parallel matrix generated
4047 
4048     Level: advanced
4049 
4050    Notes:
4051      The dimensions of the sequential matrix in each processor MUST be the same.
4052      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4053      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4054 @*/
4055 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4056 {
4057   PetscErrorCode       ierr;
4058   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4059   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4060   PetscMPIInt          size,rank,taga,*len_s;
4061   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4062   PetscInt             proc,m;
4063   PetscInt             **buf_ri,**buf_rj;
4064   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4065   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4066   MPI_Request          *s_waits,*r_waits;
4067   MPI_Status           *status;
4068   MatScalar            *aa=a->a;
4069   MatScalar            **abuf_r,*ba_i;
4070   Mat_Merge_SeqsToMPI  *merge;
4071   PetscContainer       container;
4072 
4073   PetscFunctionBegin;
4074   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4075 
4076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4078 
4079   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4080   if (container) {
4081     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4082   }
4083   bi     = merge->bi;
4084   bj     = merge->bj;
4085   buf_ri = merge->buf_ri;
4086   buf_rj = merge->buf_rj;
4087 
4088   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4089   owners = merge->rowmap->range;
4090   len_s  = merge->len_s;
4091 
4092   /* send and recv matrix values */
4093   /*-----------------------------*/
4094   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4095   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4096 
4097   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4098   for (proc=0,k=0; proc<size; proc++){
4099     if (!len_s[proc]) continue;
4100     i = owners[proc];
4101     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4102     k++;
4103   }
4104 
4105   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4106   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4107   ierr = PetscFree(status);CHKERRQ(ierr);
4108 
4109   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4110   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4111 
4112   /* insert mat values of mpimat */
4113   /*----------------------------*/
4114   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4115   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4116 
4117   for (k=0; k<merge->nrecv; k++){
4118     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4119     nrows = *(buf_ri_k[k]);
4120     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4121     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4122   }
4123 
4124   /* set values of ba */
4125   m = merge->rowmap->n;
4126   for (i=0; i<m; i++) {
4127     arow = owners[rank] + i;
4128     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4129     bnzi = bi[i+1] - bi[i];
4130     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4131 
4132     /* add local non-zero vals of this proc's seqmat into ba */
4133     anzi = ai[arow+1] - ai[arow];
4134     aj   = a->j + ai[arow];
4135     aa   = a->a + ai[arow];
4136     nextaj = 0;
4137     for (j=0; nextaj<anzi; j++){
4138       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4139         ba_i[j] += aa[nextaj++];
4140       }
4141     }
4142 
4143     /* add received vals into ba */
4144     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4145       /* i-th row */
4146       if (i == *nextrow[k]) {
4147         anzi = *(nextai[k]+1) - *nextai[k];
4148         aj   = buf_rj[k] + *(nextai[k]);
4149         aa   = abuf_r[k] + *(nextai[k]);
4150         nextaj = 0;
4151         for (j=0; nextaj<anzi; j++){
4152           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4153             ba_i[j] += aa[nextaj++];
4154           }
4155         }
4156         nextrow[k]++; nextai[k]++;
4157       }
4158     }
4159     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4160   }
4161   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4162   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4163 
4164   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4165   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4166   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4167   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4168   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4169   PetscFunctionReturn(0);
4170 }
4171 
4172 #undef __FUNCT__
4173 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4174 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4175 {
4176   PetscErrorCode       ierr;
4177   Mat                  B_mpi;
4178   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4179   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4180   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4181   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4182   PetscInt             len,proc,*dnz,*onz;
4183   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4184   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4185   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4186   MPI_Status           *status;
4187   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4188   PetscBT              lnkbt;
4189   Mat_Merge_SeqsToMPI  *merge;
4190   PetscContainer       container;
4191 
4192   PetscFunctionBegin;
4193   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4194 
4195   /* make sure it is a PETSc comm */
4196   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4198   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4199 
4200   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4201   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4202 
4203   /* determine row ownership */
4204   /*---------------------------------------------------------*/
4205   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4206   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4207   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4208   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4209   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4210   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4211   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4212 
4213   m      = merge->rowmap->n;
4214   M      = merge->rowmap->N;
4215   owners = merge->rowmap->range;
4216 
4217   /* determine the number of messages to send, their lengths */
4218   /*---------------------------------------------------------*/
4219   len_s  = merge->len_s;
4220 
4221   len = 0;  /* length of buf_si[] */
4222   merge->nsend = 0;
4223   for (proc=0; proc<size; proc++){
4224     len_si[proc] = 0;
4225     if (proc == rank){
4226       len_s[proc] = 0;
4227     } else {
4228       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4229       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4230     }
4231     if (len_s[proc]) {
4232       merge->nsend++;
4233       nrows = 0;
4234       for (i=owners[proc]; i<owners[proc+1]; i++){
4235         if (ai[i+1] > ai[i]) nrows++;
4236       }
4237       len_si[proc] = 2*(nrows+1);
4238       len += len_si[proc];
4239     }
4240   }
4241 
4242   /* determine the number and length of messages to receive for ij-structure */
4243   /*-------------------------------------------------------------------------*/
4244   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4245   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4246 
4247   /* post the Irecv of j-structure */
4248   /*-------------------------------*/
4249   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4250   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4251 
4252   /* post the Isend of j-structure */
4253   /*--------------------------------*/
4254   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4255 
4256   for (proc=0, k=0; proc<size; proc++){
4257     if (!len_s[proc]) continue;
4258     i = owners[proc];
4259     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4260     k++;
4261   }
4262 
4263   /* receives and sends of j-structure are complete */
4264   /*------------------------------------------------*/
4265   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4266   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4267 
4268   /* send and recv i-structure */
4269   /*---------------------------*/
4270   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4271   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4272 
4273   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4274   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4275   for (proc=0,k=0; proc<size; proc++){
4276     if (!len_s[proc]) continue;
4277     /* form outgoing message for i-structure:
4278          buf_si[0]:                 nrows to be sent
4279                [1:nrows]:           row index (global)
4280                [nrows+1:2*nrows+1]: i-structure index
4281     */
4282     /*-------------------------------------------*/
4283     nrows = len_si[proc]/2 - 1;
4284     buf_si_i    = buf_si + nrows+1;
4285     buf_si[0]   = nrows;
4286     buf_si_i[0] = 0;
4287     nrows = 0;
4288     for (i=owners[proc]; i<owners[proc+1]; i++){
4289       anzi = ai[i+1] - ai[i];
4290       if (anzi) {
4291         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4292         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4293         nrows++;
4294       }
4295     }
4296     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4297     k++;
4298     buf_si += len_si[proc];
4299   }
4300 
4301   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4302   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4303 
4304   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4305   for (i=0; i<merge->nrecv; i++){
4306     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4307   }
4308 
4309   ierr = PetscFree(len_si);CHKERRQ(ierr);
4310   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4311   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4312   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4313   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4314   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4315   ierr = PetscFree(status);CHKERRQ(ierr);
4316 
4317   /* compute a local seq matrix in each processor */
4318   /*----------------------------------------------*/
4319   /* allocate bi array and free space for accumulating nonzero column info */
4320   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4321   bi[0] = 0;
4322 
4323   /* create and initialize a linked list */
4324   nlnk = N+1;
4325   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4326 
4327   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4328   len = 0;
4329   len  = ai[owners[rank+1]] - ai[owners[rank]];
4330   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4331   current_space = free_space;
4332 
4333   /* determine symbolic info for each local row */
4334   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4335 
4336   for (k=0; k<merge->nrecv; k++){
4337     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4338     nrows = *buf_ri_k[k];
4339     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4340     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4341   }
4342 
4343   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4344   len = 0;
4345   for (i=0;i<m;i++) {
4346     bnzi   = 0;
4347     /* add local non-zero cols of this proc's seqmat into lnk */
4348     arow   = owners[rank] + i;
4349     anzi   = ai[arow+1] - ai[arow];
4350     aj     = a->j + ai[arow];
4351     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4352     bnzi += nlnk;
4353     /* add received col data into lnk */
4354     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4355       if (i == *nextrow[k]) { /* i-th row */
4356         anzi = *(nextai[k]+1) - *nextai[k];
4357         aj   = buf_rj[k] + *nextai[k];
4358         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4359         bnzi += nlnk;
4360         nextrow[k]++; nextai[k]++;
4361       }
4362     }
4363     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4364 
4365     /* if free space is not available, make more free space */
4366     if (current_space->local_remaining<bnzi) {
4367       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4368       nspacedouble++;
4369     }
4370     /* copy data into free space, then initialize lnk */
4371     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4372     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4373 
4374     current_space->array           += bnzi;
4375     current_space->local_used      += bnzi;
4376     current_space->local_remaining -= bnzi;
4377 
4378     bi[i+1] = bi[i] + bnzi;
4379   }
4380 
4381   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4382 
4383   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4384   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4385   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4386 
4387   /* create symbolic parallel matrix B_mpi */
4388   /*---------------------------------------*/
4389   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4390   if (n==PETSC_DECIDE) {
4391     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4392   } else {
4393     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4394   }
4395   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4396   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4397   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4398 
4399   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4400   B_mpi->assembled     = PETSC_FALSE;
4401   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4402   merge->bi            = bi;
4403   merge->bj            = bj;
4404   merge->buf_ri        = buf_ri;
4405   merge->buf_rj        = buf_rj;
4406   merge->coi           = PETSC_NULL;
4407   merge->coj           = PETSC_NULL;
4408   merge->owners_co     = PETSC_NULL;
4409 
4410   /* attach the supporting struct to B_mpi for reuse */
4411   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4412   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4413   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4414   *mpimat = B_mpi;
4415 
4416   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4417   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4418   PetscFunctionReturn(0);
4419 }
4420 
4421 #undef __FUNCT__
4422 #define __FUNCT__ "MatMerge_SeqsToMPI"
4423 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4424 {
4425   PetscErrorCode   ierr;
4426 
4427   PetscFunctionBegin;
4428   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4429   if (scall == MAT_INITIAL_MATRIX){
4430     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4431   }
4432   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4433   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 #undef __FUNCT__
4438 #define __FUNCT__ "MatGetLocalMat"
4439 /*@
4440      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4441 
4442     Not Collective
4443 
4444    Input Parameters:
4445 +    A - the matrix
4446 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4447 
4448    Output Parameter:
4449 .    A_loc - the local sequential matrix generated
4450 
4451     Level: developer
4452 
4453 @*/
4454 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4455 {
4456   PetscErrorCode  ierr;
4457   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4458   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4459   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4460   MatScalar       *aa=a->a,*ba=b->a,*cam;
4461   PetscScalar     *ca;
4462   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4463   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4464 
4465   PetscFunctionBegin;
4466   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4467   if (scall == MAT_INITIAL_MATRIX){
4468     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4469     ci[0] = 0;
4470     for (i=0; i<am; i++){
4471       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4472     }
4473     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4474     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4475     k = 0;
4476     for (i=0; i<am; i++) {
4477       ncols_o = bi[i+1] - bi[i];
4478       ncols_d = ai[i+1] - ai[i];
4479       /* off-diagonal portion of A */
4480       for (jo=0; jo<ncols_o; jo++) {
4481         col = cmap[*bj];
4482         if (col >= cstart) break;
4483         cj[k]   = col; bj++;
4484         ca[k++] = *ba++;
4485       }
4486       /* diagonal portion of A */
4487       for (j=0; j<ncols_d; j++) {
4488         cj[k]   = cstart + *aj++;
4489         ca[k++] = *aa++;
4490       }
4491       /* off-diagonal portion of A */
4492       for (j=jo; j<ncols_o; j++) {
4493         cj[k]   = cmap[*bj++];
4494         ca[k++] = *ba++;
4495       }
4496     }
4497     /* put together the new matrix */
4498     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4499     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4500     /* Since these are PETSc arrays, change flags to free them as necessary. */
4501     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4502     mat->free_a  = PETSC_TRUE;
4503     mat->free_ij = PETSC_TRUE;
4504     mat->nonew   = 0;
4505   } else if (scall == MAT_REUSE_MATRIX){
4506     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4507     ci = mat->i; cj = mat->j; cam = mat->a;
4508     for (i=0; i<am; i++) {
4509       /* off-diagonal portion of A */
4510       ncols_o = bi[i+1] - bi[i];
4511       for (jo=0; jo<ncols_o; jo++) {
4512         col = cmap[*bj];
4513         if (col >= cstart) break;
4514         *cam++ = *ba++; bj++;
4515       }
4516       /* diagonal portion of A */
4517       ncols_d = ai[i+1] - ai[i];
4518       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4519       /* off-diagonal portion of A */
4520       for (j=jo; j<ncols_o; j++) {
4521         *cam++ = *ba++; bj++;
4522       }
4523     }
4524   } else {
4525     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4526   }
4527 
4528   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4529   PetscFunctionReturn(0);
4530 }
4531 
4532 #undef __FUNCT__
4533 #define __FUNCT__ "MatGetLocalMatCondensed"
4534 /*@C
4535      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4536 
4537     Not Collective
4538 
4539    Input Parameters:
4540 +    A - the matrix
4541 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4542 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4543 
4544    Output Parameter:
4545 .    A_loc - the local sequential matrix generated
4546 
4547     Level: developer
4548 
4549 @*/
4550 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4551 {
4552   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4553   PetscErrorCode    ierr;
4554   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4555   IS                isrowa,iscola;
4556   Mat               *aloc;
4557 
4558   PetscFunctionBegin;
4559   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4560   if (!row){
4561     start = A->rmap->rstart; end = A->rmap->rend;
4562     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4563   } else {
4564     isrowa = *row;
4565   }
4566   if (!col){
4567     start = A->cmap->rstart;
4568     cmap  = a->garray;
4569     nzA   = a->A->cmap->n;
4570     nzB   = a->B->cmap->n;
4571     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4572     ncols = 0;
4573     for (i=0; i<nzB; i++) {
4574       if (cmap[i] < start) idx[ncols++] = cmap[i];
4575       else break;
4576     }
4577     imark = i;
4578     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4579     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4580     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4581     ierr = PetscFree(idx);CHKERRQ(ierr);
4582   } else {
4583     iscola = *col;
4584   }
4585   if (scall != MAT_INITIAL_MATRIX){
4586     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4587     aloc[0] = *A_loc;
4588   }
4589   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4590   *A_loc = aloc[0];
4591   ierr = PetscFree(aloc);CHKERRQ(ierr);
4592   if (!row){
4593     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4594   }
4595   if (!col){
4596     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4597   }
4598   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4599   PetscFunctionReturn(0);
4600 }
4601 
4602 #undef __FUNCT__
4603 #define __FUNCT__ "MatGetBrowsOfAcols"
4604 /*@C
4605     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4606 
4607     Collective on Mat
4608 
4609    Input Parameters:
4610 +    A,B - the matrices in mpiaij format
4611 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4612 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4613 
4614    Output Parameter:
4615 +    rowb, colb - index sets of rows and columns of B to extract
4616 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4617 -    B_seq - the sequential matrix generated
4618 
4619     Level: developer
4620 
4621 @*/
4622 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4623 {
4624   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4625   PetscErrorCode    ierr;
4626   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4627   IS                isrowb,iscolb;
4628   Mat               *bseq;
4629 
4630   PetscFunctionBegin;
4631   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4632     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4633   }
4634   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4635 
4636   if (scall == MAT_INITIAL_MATRIX){
4637     start = A->cmap->rstart;
4638     cmap  = a->garray;
4639     nzA   = a->A->cmap->n;
4640     nzB   = a->B->cmap->n;
4641     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4642     ncols = 0;
4643     for (i=0; i<nzB; i++) {  /* row < local row index */
4644       if (cmap[i] < start) idx[ncols++] = cmap[i];
4645       else break;
4646     }
4647     imark = i;
4648     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4649     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4650     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4651     ierr = PetscFree(idx);CHKERRQ(ierr);
4652     *brstart = imark;
4653     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4654   } else {
4655     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4656     isrowb = *rowb; iscolb = *colb;
4657     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4658     bseq[0] = *B_seq;
4659   }
4660   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4661   *B_seq = bseq[0];
4662   ierr = PetscFree(bseq);CHKERRQ(ierr);
4663   if (!rowb){
4664     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4665   } else {
4666     *rowb = isrowb;
4667   }
4668   if (!colb){
4669     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4670   } else {
4671     *colb = iscolb;
4672   }
4673   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4674   PetscFunctionReturn(0);
4675 }
4676 
4677 #undef __FUNCT__
4678 #define __FUNCT__ "MatGetBrowsOfAoCols"
4679 /*@C
4680     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4681     of the OFF-DIAGONAL portion of local A
4682 
4683     Collective on Mat
4684 
4685    Input Parameters:
4686 +    A,B - the matrices in mpiaij format
4687 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4688 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4689 .    startsj_r - similar to startsj for receives
4690 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4691 
4692    Output Parameter:
4693 +    B_oth - the sequential matrix generated
4694 
4695     Level: developer
4696 
4697 @*/
4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4699 {
4700   VecScatter_MPI_General *gen_to,*gen_from;
4701   PetscErrorCode         ierr;
4702   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4703   Mat_SeqAIJ             *b_oth;
4704   VecScatter             ctx=a->Mvctx;
4705   MPI_Comm               comm=((PetscObject)ctx)->comm;
4706   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4707   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4708   PetscScalar            *rvalues,*svalues;
4709   MatScalar              *b_otha,*bufa,*bufA;
4710   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4711   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4712   MPI_Status             *sstatus,rstatus;
4713   PetscMPIInt            jj;
4714   PetscInt               *cols,sbs,rbs;
4715   PetscScalar            *vals;
4716 
4717   PetscFunctionBegin;
4718   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4719     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4720   }
4721   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4723 
4724   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4725   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4726   rvalues  = gen_from->values; /* holds the length of receiving row */
4727   svalues  = gen_to->values;   /* holds the length of sending row */
4728   nrecvs   = gen_from->n;
4729   nsends   = gen_to->n;
4730 
4731   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4732   srow     = gen_to->indices;   /* local row index to be sent */
4733   sstarts  = gen_to->starts;
4734   sprocs   = gen_to->procs;
4735   sstatus  = gen_to->sstatus;
4736   sbs      = gen_to->bs;
4737   rstarts  = gen_from->starts;
4738   rprocs   = gen_from->procs;
4739   rbs      = gen_from->bs;
4740 
4741   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4742   if (scall == MAT_INITIAL_MATRIX){
4743     /* i-array */
4744     /*---------*/
4745     /*  post receives */
4746     for (i=0; i<nrecvs; i++){
4747       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4748       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4749       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4750     }
4751 
4752     /* pack the outgoing message */
4753     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4754     sstartsj[0] = 0;  rstartsj[0] = 0;
4755     len = 0; /* total length of j or a array to be sent */
4756     k = 0;
4757     for (i=0; i<nsends; i++){
4758       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4759       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4760       for (j=0; j<nrows; j++) {
4761         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4762         for (l=0; l<sbs; l++){
4763           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4764           rowlen[j*sbs+l] = ncols;
4765           len += ncols;
4766           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4767         }
4768         k++;
4769       }
4770       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4771       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4772     }
4773     /* recvs and sends of i-array are completed */
4774     i = nrecvs;
4775     while (i--) {
4776       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4777     }
4778     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4779 
4780     /* allocate buffers for sending j and a arrays */
4781     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4782     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4783 
4784     /* create i-array of B_oth */
4785     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4786     b_othi[0] = 0;
4787     len = 0; /* total length of j or a array to be received */
4788     k = 0;
4789     for (i=0; i<nrecvs; i++){
4790       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4791       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4792       for (j=0; j<nrows; j++) {
4793         b_othi[k+1] = b_othi[k] + rowlen[j];
4794         len += rowlen[j]; k++;
4795       }
4796       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4797     }
4798 
4799     /* allocate space for j and a arrrays of B_oth */
4800     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4801     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4802 
4803     /* j-array */
4804     /*---------*/
4805     /*  post receives of j-array */
4806     for (i=0; i<nrecvs; i++){
4807       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4808       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4809     }
4810 
4811     /* pack the outgoing message j-array */
4812     k = 0;
4813     for (i=0; i<nsends; i++){
4814       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4815       bufJ = bufj+sstartsj[i];
4816       for (j=0; j<nrows; j++) {
4817         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4818         for (ll=0; ll<sbs; ll++){
4819           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4820           for (l=0; l<ncols; l++){
4821             *bufJ++ = cols[l];
4822           }
4823           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4824         }
4825       }
4826       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4827     }
4828 
4829     /* recvs and sends of j-array are completed */
4830     i = nrecvs;
4831     while (i--) {
4832       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4833     }
4834     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4835   } else if (scall == MAT_REUSE_MATRIX){
4836     sstartsj = *startsj;
4837     rstartsj = *startsj_r;
4838     bufa     = *bufa_ptr;
4839     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4840     b_otha   = b_oth->a;
4841   } else {
4842     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4843   }
4844 
4845   /* a-array */
4846   /*---------*/
4847   /*  post receives of a-array */
4848   for (i=0; i<nrecvs; i++){
4849     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4850     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4851   }
4852 
4853   /* pack the outgoing message a-array */
4854   k = 0;
4855   for (i=0; i<nsends; i++){
4856     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4857     bufA = bufa+sstartsj[i];
4858     for (j=0; j<nrows; j++) {
4859       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4860       for (ll=0; ll<sbs; ll++){
4861         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4862         for (l=0; l<ncols; l++){
4863           *bufA++ = vals[l];
4864         }
4865         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4866       }
4867     }
4868     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4869   }
4870   /* recvs and sends of a-array are completed */
4871   i = nrecvs;
4872   while (i--) {
4873     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4874   }
4875   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4876   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4877 
4878   if (scall == MAT_INITIAL_MATRIX){
4879     /* put together the new matrix */
4880     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4881 
4882     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4883     /* Since these are PETSc arrays, change flags to free them as necessary. */
4884     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4885     b_oth->free_a  = PETSC_TRUE;
4886     b_oth->free_ij = PETSC_TRUE;
4887     b_oth->nonew   = 0;
4888 
4889     ierr = PetscFree(bufj);CHKERRQ(ierr);
4890     if (!startsj || !bufa_ptr){
4891       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4892       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4893     } else {
4894       *startsj   = sstartsj;
4895       *startsj_r = rstartsj;
4896       *bufa_ptr  = bufa;
4897     }
4898   }
4899   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatGetCommunicationStructs"
4905 /*@C
4906   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4907 
4908   Not Collective
4909 
4910   Input Parameters:
4911 . A - The matrix in mpiaij format
4912 
4913   Output Parameter:
4914 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4915 . colmap - A map from global column index to local index into lvec
4916 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4917 
4918   Level: developer
4919 
4920 @*/
4921 #if defined (PETSC_USE_CTABLE)
4922 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4923 #else
4924 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4925 #endif
4926 {
4927   Mat_MPIAIJ *a;
4928 
4929   PetscFunctionBegin;
4930   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4931   PetscValidPointer(lvec, 2);
4932   PetscValidPointer(colmap, 3);
4933   PetscValidPointer(multScatter, 4);
4934   a = (Mat_MPIAIJ *) A->data;
4935   if (lvec) *lvec = a->lvec;
4936   if (colmap) *colmap = a->colmap;
4937   if (multScatter) *multScatter = a->Mvctx;
4938   PetscFunctionReturn(0);
4939 }
4940 
4941 EXTERN_C_BEGIN
4942 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,const MatType,MatReuse,Mat*);
4943 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,const MatType,MatReuse,Mat*);
4944 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
4945 EXTERN_C_END
4946 
4947 #undef __FUNCT__
4948 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
4949 /*
4950     Computes (B'*A')' since computing B*A directly is untenable
4951 
4952                n                       p                          p
4953         (              )       (              )         (                  )
4954       m (      A       )  *  n (       B      )   =   m (         C        )
4955         (              )       (              )         (                  )
4956 
4957 */
4958 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
4959 {
4960   PetscErrorCode     ierr;
4961   Mat                At,Bt,Ct;
4962 
4963   PetscFunctionBegin;
4964   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
4965   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
4966   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
4967   ierr = MatDestroy(At);CHKERRQ(ierr);
4968   ierr = MatDestroy(Bt);CHKERRQ(ierr);
4969   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
4970   ierr = MatDestroy(Ct);CHKERRQ(ierr);
4971   PetscFunctionReturn(0);
4972 }
4973 
4974 #undef __FUNCT__
4975 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
4976 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
4977 {
4978   PetscErrorCode ierr;
4979   PetscInt       m=A->rmap->n,n=B->cmap->n;
4980   Mat            Cmat;
4981 
4982   PetscFunctionBegin;
4983   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
4984   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
4985   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4986   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
4987   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
4988   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4989   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4990   *C   = Cmat;
4991   PetscFunctionReturn(0);
4992 }
4993 
4994 /* ----------------------------------------------------------------*/
4995 #undef __FUNCT__
4996 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
4997 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
4998 {
4999   PetscErrorCode ierr;
5000 
5001   PetscFunctionBegin;
5002   if (scall == MAT_INITIAL_MATRIX){
5003     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5004   }
5005   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5006   PetscFunctionReturn(0);
5007 }
5008 
5009 EXTERN_C_BEGIN
5010 #if defined(PETSC_HAVE_MUMPS)
5011 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5012 #endif
5013 #if defined(PETSC_HAVE_PASTIX)
5014 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5015 #endif
5016 #if defined(PETSC_HAVE_SUPERLU_DIST)
5017 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5018 #endif
5019 #if defined(PETSC_HAVE_SPOOLES)
5020 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5021 #endif
5022 EXTERN_C_END
5023 
5024 /*MC
5025    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5026 
5027    Options Database Keys:
5028 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5029 
5030   Level: beginner
5031 
5032 .seealso: MatCreateMPIAIJ()
5033 M*/
5034 
5035 EXTERN_C_BEGIN
5036 #undef __FUNCT__
5037 #define __FUNCT__ "MatCreate_MPIAIJ"
5038 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5039 {
5040   Mat_MPIAIJ     *b;
5041   PetscErrorCode ierr;
5042   PetscMPIInt    size;
5043 
5044   PetscFunctionBegin;
5045   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5046 
5047   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5048   B->data         = (void*)b;
5049   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5050   B->rmap->bs     = 1;
5051   B->assembled    = PETSC_FALSE;
5052   B->mapping      = 0;
5053 
5054   B->insertmode   = NOT_SET_VALUES;
5055   b->size         = size;
5056   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5057 
5058   /* build cache for off array entries formed */
5059   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5060   b->donotstash  = PETSC_FALSE;
5061   b->colmap      = 0;
5062   b->garray      = 0;
5063   b->roworiented = PETSC_TRUE;
5064 
5065   /* stuff used for matrix vector multiply */
5066   b->lvec      = PETSC_NULL;
5067   b->Mvctx     = PETSC_NULL;
5068 
5069   /* stuff for MatGetRow() */
5070   b->rowindices   = 0;
5071   b->rowvalues    = 0;
5072   b->getrowactive = PETSC_FALSE;
5073 
5074 #if defined(PETSC_HAVE_SPOOLES)
5075   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5076                                      "MatGetFactor_mpiaij_spooles",
5077                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5078 #endif
5079 #if defined(PETSC_HAVE_MUMPS)
5080   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5081                                      "MatGetFactor_aij_mumps",
5082                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5083 #endif
5084 #if defined(PETSC_HAVE_PASTIX)
5085   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5086 					   "MatGetFactor_mpiaij_pastix",
5087 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5088 #endif
5089 #if defined(PETSC_HAVE_SUPERLU_DIST)
5090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5091                                      "MatGetFactor_mpiaij_superlu_dist",
5092                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5093 #endif
5094   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5095                                      "MatStoreValues_MPIAIJ",
5096                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5097   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5098                                      "MatRetrieveValues_MPIAIJ",
5099                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5100   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5101 				     "MatGetDiagonalBlock_MPIAIJ",
5102                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5103   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5104 				     "MatIsTranspose_MPIAIJ",
5105 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5106   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5107 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5108 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5109   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5110 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5111 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5112   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5113 				     "MatDiagonalScaleLocal_MPIAIJ",
5114 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5115   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
5116                                      "MatConvert_MPIAIJ_MPICSRPERM",
5117                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
5118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
5119                                      "MatConvert_MPIAIJ_MPICRL",
5120                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
5121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5122                                      "MatConvert_MPIAIJ_MPISBAIJ",
5123                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5125                                      "MatMatMult_MPIDense_MPIAIJ",
5126                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5128                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5129                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5131                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5132                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5133   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5134   PetscFunctionReturn(0);
5135 }
5136 EXTERN_C_END
5137 
5138 #undef __FUNCT__
5139 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5140 /*@
5141      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5142          and "off-diagonal" part of the matrix in CSR format.
5143 
5144    Collective on MPI_Comm
5145 
5146    Input Parameters:
5147 +  comm - MPI communicator
5148 .  m - number of local rows (Cannot be PETSC_DECIDE)
5149 .  n - This value should be the same as the local size used in creating the
5150        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5151        calculated if N is given) For square matrices n is almost always m.
5152 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5153 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5154 .   i - row indices for "diagonal" portion of matrix
5155 .   j - column indices
5156 .   a - matrix values
5157 .   oi - row indices for "off-diagonal" portion of matrix
5158 .   oj - column indices
5159 -   oa - matrix values
5160 
5161    Output Parameter:
5162 .   mat - the matrix
5163 
5164    Level: advanced
5165 
5166    Notes:
5167        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5168 
5169        The i and j indices are 0 based
5170 
5171        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5172 
5173        This sets local rows and cannot be used to set off-processor values.
5174 
5175        You cannot later use MatSetValues() to change values in this matrix.
5176 
5177 .keywords: matrix, aij, compressed row, sparse, parallel
5178 
5179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5180           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5181 @*/
5182 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5183 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5184 {
5185   PetscErrorCode ierr;
5186   Mat_MPIAIJ     *maij;
5187 
5188  PetscFunctionBegin;
5189   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5190   if (i[0]) {
5191     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5192   }
5193   if (oi[0]) {
5194     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5195   }
5196   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5197   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5198   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5199   maij = (Mat_MPIAIJ*) (*mat)->data;
5200   maij->donotstash     = PETSC_TRUE;
5201   (*mat)->preallocated = PETSC_TRUE;
5202 
5203   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5204   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5205   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5206   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5207 
5208   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5209   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5210 
5211   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5212   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5213   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5214   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5215 
5216   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5217   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5218   PetscFunctionReturn(0);
5219 }
5220 
5221 /*
5222     Special version for direct calls from Fortran
5223 */
5224 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5225 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5226 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5227 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5228 #endif
5229 
5230 /* Change these macros so can be used in void function */
5231 #undef CHKERRQ
5232 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5233 #undef SETERRQ2
5234 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5235 #undef SETERRQ
5236 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5237 
5238 EXTERN_C_BEGIN
5239 #undef __FUNCT__
5240 #define __FUNCT__ "matsetvaluesmpiaij_"
5241 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5242 {
5243   Mat             mat = *mmat;
5244   PetscInt        m = *mm, n = *mn;
5245   InsertMode      addv = *maddv;
5246   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5247   PetscScalar     value;
5248   PetscErrorCode  ierr;
5249 
5250   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5251   if (mat->insertmode == NOT_SET_VALUES) {
5252     mat->insertmode = addv;
5253   }
5254 #if defined(PETSC_USE_DEBUG)
5255   else if (mat->insertmode != addv) {
5256     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5257   }
5258 #endif
5259   {
5260   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5261   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5262   PetscTruth      roworiented = aij->roworiented;
5263 
5264   /* Some Variables required in the macro */
5265   Mat             A = aij->A;
5266   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5267   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5268   MatScalar       *aa = a->a;
5269   PetscTruth      ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5270   Mat             B = aij->B;
5271   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5272   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5273   MatScalar       *ba = b->a;
5274 
5275   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5276   PetscInt        nonew = a->nonew;
5277   MatScalar       *ap1,*ap2;
5278 
5279   PetscFunctionBegin;
5280   for (i=0; i<m; i++) {
5281     if (im[i] < 0) continue;
5282 #if defined(PETSC_USE_DEBUG)
5283     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5284 #endif
5285     if (im[i] >= rstart && im[i] < rend) {
5286       row      = im[i] - rstart;
5287       lastcol1 = -1;
5288       rp1      = aj + ai[row];
5289       ap1      = aa + ai[row];
5290       rmax1    = aimax[row];
5291       nrow1    = ailen[row];
5292       low1     = 0;
5293       high1    = nrow1;
5294       lastcol2 = -1;
5295       rp2      = bj + bi[row];
5296       ap2      = ba + bi[row];
5297       rmax2    = bimax[row];
5298       nrow2    = bilen[row];
5299       low2     = 0;
5300       high2    = nrow2;
5301 
5302       for (j=0; j<n; j++) {
5303         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5304         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5305         if (in[j] >= cstart && in[j] < cend){
5306           col = in[j] - cstart;
5307           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5308         } else if (in[j] < 0) continue;
5309 #if defined(PETSC_USE_DEBUG)
5310         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5311 #endif
5312         else {
5313           if (mat->was_assembled) {
5314             if (!aij->colmap) {
5315               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5316             }
5317 #if defined (PETSC_USE_CTABLE)
5318             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5319 	    col--;
5320 #else
5321             col = aij->colmap[in[j]] - 1;
5322 #endif
5323             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5324               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5325               col =  in[j];
5326               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5327               B = aij->B;
5328               b = (Mat_SeqAIJ*)B->data;
5329               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5330               rp2      = bj + bi[row];
5331               ap2      = ba + bi[row];
5332               rmax2    = bimax[row];
5333               nrow2    = bilen[row];
5334               low2     = 0;
5335               high2    = nrow2;
5336               bm       = aij->B->rmap->n;
5337               ba = b->a;
5338             }
5339           } else col = in[j];
5340           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5341         }
5342       }
5343     } else {
5344       if (!aij->donotstash) {
5345         if (roworiented) {
5346           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5347         } else {
5348           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscTruth)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5349         }
5350       }
5351     }
5352   }}
5353   PetscFunctionReturnVoid();
5354 }
5355 EXTERN_C_END
5356 
5357