xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 85ebf7a405d2796ceff80dccd191f91eacafcbce)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
562 {
563   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode ierr;
565   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt       i,*owners = A->rmap->range;
567   PetscInt       *nprocs,j,idx,nsends,row;
568   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt       *rvalues,count,base,slen,*source;
570   PetscInt       *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm       comm = ((PetscObject)A)->comm;
572   MPI_Request    *send_waits,*recv_waits;
573   MPI_Status     recv_status,*send_status;
574 #if defined(PETSC_DEBUG)
575   PetscBool      found = PETSC_FALSE;
576 #endif
577 
578   PetscFunctionBegin;
579   /*  first count number of contributors to each processor */
580   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
581   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
582   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
583   j = 0;
584   for (i=0; i<N; i++) {
585     if (lastidx > (idx = rows[i])) j = 0;
586     lastidx = idx;
587     for (; j<size; j++) {
588       if (idx >= owners[j] && idx < owners[j+1]) {
589         nprocs[2*j]++;
590         nprocs[2*j+1] = 1;
591         owner[i] = j;
592 #if defined(PETSC_DEBUG)
593         found = PETSC_TRUE;
594 #endif
595         break;
596       }
597     }
598 #if defined(PETSC_DEBUG)
599     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
600     found = PETSC_FALSE;
601 #endif
602   }
603   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
604 
605   if (A->nooffproczerorows) {
606     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
607     nrecvs = nsends;
608     nmax   = N;
609   } else {
610     /* inform other processors of number of messages and max length*/
611     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
612   }
613 
614   /* post receives:   */
615   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
616   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
617   for (i=0; i<nrecvs; i++) {
618     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
619   }
620 
621   /* do sends:
622       1) starts[i] gives the starting index in svalues for stuff going to
623          the ith processor
624   */
625   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
626   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
627   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
628   starts[0] = 0;
629   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
630   for (i=0; i<N; i++) {
631     svalues[starts[owner[i]]++] = rows[i];
632   }
633 
634   starts[0] = 0;
635   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
636   count = 0;
637   for (i=0; i<size; i++) {
638     if (nprocs[2*i+1]) {
639       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
640     }
641   }
642   ierr = PetscFree(starts);CHKERRQ(ierr);
643 
644   base = owners[rank];
645 
646   /*  wait on receives */
647   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
648   count  = nrecvs; slen = 0;
649   while (count) {
650     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
651     /* unpack receives into our local space */
652     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
653     source[imdex]  = recv_status.MPI_SOURCE;
654     lens[imdex]    = n;
655     slen          += n;
656     count--;
657   }
658   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
659 
660   /* move the data into the send scatter */
661   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
662   count = 0;
663   for (i=0; i<nrecvs; i++) {
664     values = rvalues + i*nmax;
665     for (j=0; j<lens[i]; j++) {
666       lrows[count++] = values[j] - base;
667     }
668   }
669   ierr = PetscFree(rvalues);CHKERRQ(ierr);
670   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
671   ierr = PetscFree(owner);CHKERRQ(ierr);
672   ierr = PetscFree(nprocs);CHKERRQ(ierr);
673 
674   /* actually zap the local rows */
675   /*
676         Zero the required rows. If the "diagonal block" of the matrix
677      is square and the user wishes to set the diagonal we use separate
678      code so that MatSetValues() is not called for each diagonal allocating
679      new memory, thus calling lots of mallocs and slowing things down.
680 
681   */
682   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
683   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
684   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
685     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
686   } else if (diag != 0.0) {
687     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
688     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
689       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
690 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
691     }
692     for (i = 0; i < slen; i++) {
693       row  = lrows[i] + rstart;
694       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
695     }
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698   } else {
699     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
700   }
701   ierr = PetscFree(lrows);CHKERRQ(ierr);
702 
703   /* wait on sends */
704   if (nsends) {
705     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
706     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
707     ierr = PetscFree(send_status);CHKERRQ(ierr);
708   }
709   ierr = PetscFree(send_waits);CHKERRQ(ierr);
710   ierr = PetscFree(svalues);CHKERRQ(ierr);
711 
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatMult_MPIAIJ"
717 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721   PetscInt       nt;
722 
723   PetscFunctionBegin;
724   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
725   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
726   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
727   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
728   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
729   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 #undef __FUNCT__
734 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
735 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
736 {
737   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatMultAdd_MPIAIJ"
747 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
748 {
749   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
754   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
755   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
756   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
762 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
763 {
764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
765   PetscErrorCode ierr;
766   PetscBool      merged;
767 
768   PetscFunctionBegin;
769   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
770   /* do nondiagonal part */
771   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
772   if (!merged) {
773     /* send it on its way */
774     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
775     /* do local part */
776     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
777     /* receive remote parts: note this assumes the values are not actually */
778     /* added in yy until the next line, */
779     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
780   } else {
781     /* do local part */
782     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
783     /* send it on its way */
784     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
785     /* values actually were received in the Begin() but we need to call this nop */
786     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 EXTERN_C_BEGIN
792 #undef __FUNCT__
793 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
794 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
795 {
796   MPI_Comm       comm;
797   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
798   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
799   IS             Me,Notme;
800   PetscErrorCode ierr;
801   PetscInt       M,N,first,last,*notme,i;
802   PetscMPIInt    size;
803 
804   PetscFunctionBegin;
805 
806   /* Easy test: symmetric diagonal block */
807   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
808   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
809   if (!*f) PetscFunctionReturn(0);
810   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
811   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
812   if (size == 1) PetscFunctionReturn(0);
813 
814   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
815   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
816   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
817   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
818   for (i=0; i<first; i++) notme[i] = i;
819   for (i=last; i<M; i++) notme[i-last+first] = i;
820   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
821   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
822   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
823   Aoff = Aoffs[0];
824   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
825   Boff = Boffs[0];
826   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
827   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
828   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
829   ierr = ISDestroy(Me);CHKERRQ(ierr);
830   ierr = ISDestroy(Notme);CHKERRQ(ierr);
831 
832   PetscFunctionReturn(0);
833 }
834 EXTERN_C_END
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
838 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
839 {
840   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode ierr;
842 
843   PetscFunctionBegin;
844   /* do nondiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
846   /* send it on its way */
847   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
848   /* do local part */
849   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
850   /* receive remote parts */
851   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
852   PetscFunctionReturn(0);
853 }
854 
855 /*
856   This only works correctly for square matrices where the subblock A->A is the
857    diagonal block
858 */
859 #undef __FUNCT__
860 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
861 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
862 {
863   PetscErrorCode ierr;
864   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
865 
866   PetscFunctionBegin;
867   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
868   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
869   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatScale_MPIAIJ"
875 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
876 {
877   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
878   PetscErrorCode ierr;
879 
880   PetscFunctionBegin;
881   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
882   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatDestroy_MPIAIJ"
888 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
889 {
890   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode ierr;
892 
893   PetscFunctionBegin;
894 #if defined(PETSC_USE_LOG)
895   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
896 #endif
897   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
898   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
899   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
900   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
901 #if defined (PETSC_USE_CTABLE)
902   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
903 #else
904   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
905 #endif
906   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
907   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
908   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
909   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
910   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
911   ierr = PetscFree(aij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
921   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatView_MPIAIJ_Binary"
927 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
928 {
929   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
930   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
931   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
932   PetscErrorCode    ierr;
933   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
934   int               fd;
935   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
936   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
937   PetscScalar       *column_values;
938   PetscInt          message_count,flowcontrolcount;
939 
940   PetscFunctionBegin;
941   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
942   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
943   nz   = A->nz + B->nz;
944   if (!rank) {
945     header[0] = MAT_FILE_CLASSID;
946     header[1] = mat->rmap->N;
947     header[2] = mat->cmap->N;
948     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
949     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
950     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
951     /* get largest number of rows any processor has */
952     rlen = mat->rmap->n;
953     range = mat->rmap->range;
954     for (i=1; i<size; i++) {
955       rlen = PetscMax(rlen,range[i+1] - range[i]);
956     }
957   } else {
958     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
959     rlen = mat->rmap->n;
960   }
961 
962   /* load up the local row counts */
963   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
964   for (i=0; i<mat->rmap->n; i++) {
965     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
966   }
967 
968   /* store the row lengths to the file */
969   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
970   if (!rank) {
971     MPI_Status status;
972     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
973     for (i=1; i<size; i++) {
974       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
975       rlen = range[i+1] - range[i];
976       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
977       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
978     }
979     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
980   } else {
981     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
982     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
983     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
984   }
985   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
986 
987   /* load up the local column indices */
988   nzmax = nz; /* )th processor needs space a largest processor needs */
989   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
990   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
991   cnt  = 0;
992   for (i=0; i<mat->rmap->n; i++) {
993     for (j=B->i[i]; j<B->i[i+1]; j++) {
994       if ( (col = garray[B->j[j]]) > cstart) break;
995       column_indices[cnt++] = col;
996     }
997     for (k=A->i[i]; k<A->i[i+1]; k++) {
998       column_indices[cnt++] = A->j[k] + cstart;
999     }
1000     for (; j<B->i[i+1]; j++) {
1001       column_indices[cnt++] = garray[B->j[j]];
1002     }
1003   }
1004   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1005 
1006   /* store the column indices to the file */
1007    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1008   if (!rank) {
1009     MPI_Status status;
1010     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1011     for (i=1; i<size; i++) {
1012       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1013       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1014       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1015       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1016       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1017     }
1018      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1019   } else {
1020     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1021     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1022     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1023     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1024   }
1025   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1026 
1027   /* load up the local column values */
1028   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1029   cnt  = 0;
1030   for (i=0; i<mat->rmap->n; i++) {
1031     for (j=B->i[i]; j<B->i[i+1]; j++) {
1032       if ( garray[B->j[j]] > cstart) break;
1033       column_values[cnt++] = B->a[j];
1034     }
1035     for (k=A->i[i]; k<A->i[i+1]; k++) {
1036       column_values[cnt++] = A->a[k];
1037     }
1038     for (; j<B->i[i+1]; j++) {
1039       column_values[cnt++] = B->a[j];
1040     }
1041   }
1042   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1043 
1044   /* store the column values to the file */
1045    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1046   if (!rank) {
1047     MPI_Status status;
1048     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1049     for (i=1; i<size; i++) {
1050        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1051       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1052       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1053       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1054       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1055     }
1056     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1057   } else {
1058     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1059     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1060     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1061     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1062   }
1063   ierr = PetscFree(column_values);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1069 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1070 {
1071   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1072   PetscErrorCode    ierr;
1073   PetscMPIInt       rank = aij->rank,size = aij->size;
1074   PetscBool         isdraw,iascii,isbinary;
1075   PetscViewer       sviewer;
1076   PetscViewerFormat format;
1077 
1078   PetscFunctionBegin;
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1082   if (iascii) {
1083     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1084     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1085       MatInfo    info;
1086       PetscBool  inodes;
1087 
1088       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1090       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1091       if (!inodes) {
1092         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1093 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1094       } else {
1095         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1096 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1097       }
1098       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1100       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1102       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1104       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1107       PetscInt   inodecount,inodelimit,*inodes;
1108       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1109       if (inodes) {
1110         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1111       } else {
1112         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1113       }
1114       PetscFunctionReturn(0);
1115     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1116       PetscFunctionReturn(0);
1117     }
1118   } else if (isbinary) {
1119     if (size == 1) {
1120       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1121       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1122     } else {
1123       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1124     }
1125     PetscFunctionReturn(0);
1126   } else if (isdraw) {
1127     PetscDraw  draw;
1128     PetscBool  isnull;
1129     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1130     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1131   }
1132 
1133   if (size == 1) {
1134     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1135     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1136   } else {
1137     /* assemble the entire matrix onto first processor. */
1138     Mat         A;
1139     Mat_SeqAIJ  *Aloc;
1140     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1141     MatScalar   *a;
1142 
1143     if (mat->rmap->N > 1024) {
1144       PetscBool  flg = PETSC_FALSE;
1145 
1146       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1147       if (!flg) {
1148         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1149       }
1150     }
1151 
1152     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1153     if (!rank) {
1154       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1155     } else {
1156       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1157     }
1158     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1159     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1160     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1161     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1162 
1163     /* copy over the A part */
1164     Aloc = (Mat_SeqAIJ*)aij->A->data;
1165     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1166     row = mat->rmap->rstart;
1167     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1168     for (i=0; i<m; i++) {
1169       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1170       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1171     }
1172     aj = Aloc->j;
1173     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1174 
1175     /* copy over the B part */
1176     Aloc = (Mat_SeqAIJ*)aij->B->data;
1177     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1178     row  = mat->rmap->rstart;
1179     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1180     ct   = cols;
1181     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1182     for (i=0; i<m; i++) {
1183       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1184       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1185     }
1186     ierr = PetscFree(ct);CHKERRQ(ierr);
1187     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1188     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1189     /*
1190        Everyone has to call to draw the matrix since the graphics waits are
1191        synchronized across all processors that share the PetscDraw object
1192     */
1193     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1194     if (!rank) {
1195       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1196       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1197       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1198       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1199     }
1200     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1201     ierr = MatDestroy(A);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatView_MPIAIJ"
1208 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1209 {
1210   PetscErrorCode ierr;
1211   PetscBool      iascii,isdraw,issocket,isbinary;
1212 
1213   PetscFunctionBegin;
1214   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1215   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1216   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1217   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1218   if (iascii || isdraw || isbinary || issocket) {
1219     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1220   } else {
1221     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1222   }
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatSOR_MPIAIJ"
1228 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1229 {
1230   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1231   PetscErrorCode ierr;
1232   Vec            bb1 = 0;
1233   PetscBool      hasop;
1234 
1235   PetscFunctionBegin;
1236   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1237     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1238   }
1239 
1240   if (flag == SOR_APPLY_UPPER) {
1241     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1242     PetscFunctionReturn(0);
1243   }
1244 
1245   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1246     if (flag & SOR_ZERO_INITIAL_GUESS) {
1247       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1248       its--;
1249     }
1250 
1251     while (its--) {
1252       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1253       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1254 
1255       /* update rhs: bb1 = bb - B*x */
1256       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1257       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1258 
1259       /* local sweep */
1260       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1261     }
1262   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1263     if (flag & SOR_ZERO_INITIAL_GUESS) {
1264       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1265       its--;
1266     }
1267     while (its--) {
1268       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1270 
1271       /* update rhs: bb1 = bb - B*x */
1272       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1273       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1274 
1275       /* local sweep */
1276       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1277     }
1278   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1279     if (flag & SOR_ZERO_INITIAL_GUESS) {
1280       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1281       its--;
1282     }
1283     while (its--) {
1284       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286 
1287       /* update rhs: bb1 = bb - B*x */
1288       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1289       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1290 
1291       /* local sweep */
1292       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1293     }
1294   }  else if (flag & SOR_EISENSTAT) {
1295     Vec         xx1;
1296 
1297     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1298     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1299 
1300     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1301     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302     if (!mat->diag) {
1303       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1304       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1305     }
1306     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1307     if (hasop) {
1308       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1309     } else {
1310       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1311     }
1312     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1313 
1314     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1315 
1316     /* local sweep */
1317     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1318     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1319     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1320   } else {
1321     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1322   }
1323 
1324   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatPermute_MPIAIJ"
1330 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1331 {
1332   MPI_Comm       comm,pcomm;
1333   PetscInt       first,local_size,nrows;
1334   const PetscInt *rows;
1335   PetscMPIInt    size;
1336   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1341   /* make a collective version of 'rowp' */
1342   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1343   if (pcomm==comm) {
1344     crowp = rowp;
1345   } else {
1346     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1347     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1348     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1349     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1350   }
1351   /* collect the global row permutation and invert it */
1352   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1353   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1354   if (pcomm!=comm) {
1355     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1356   }
1357   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1358   /* get the local target indices */
1359   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1360   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1361   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1362   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1363   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1364   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1365   /* the column permutation is so much easier;
1366      make a local version of 'colp' and invert it */
1367   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1368   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1369   if (size==1) {
1370     lcolp = colp;
1371   } else {
1372     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1373     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1374     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1375   }
1376   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1377   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1378   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1379   if (size>1) {
1380     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1381     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1382   }
1383   /* now we just get the submatrix */
1384   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1385   /* clean up */
1386   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1387   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1393 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1394 {
1395   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1396   Mat            A = mat->A,B = mat->B;
1397   PetscErrorCode ierr;
1398   PetscReal      isend[5],irecv[5];
1399 
1400   PetscFunctionBegin;
1401   info->block_size     = 1.0;
1402   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1403   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1404   isend[3] = info->memory;  isend[4] = info->mallocs;
1405   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1406   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1407   isend[3] += info->memory;  isend[4] += info->mallocs;
1408   if (flag == MAT_LOCAL) {
1409     info->nz_used      = isend[0];
1410     info->nz_allocated = isend[1];
1411     info->nz_unneeded  = isend[2];
1412     info->memory       = isend[3];
1413     info->mallocs      = isend[4];
1414   } else if (flag == MAT_GLOBAL_MAX) {
1415     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1416     info->nz_used      = irecv[0];
1417     info->nz_allocated = irecv[1];
1418     info->nz_unneeded  = irecv[2];
1419     info->memory       = irecv[3];
1420     info->mallocs      = irecv[4];
1421   } else if (flag == MAT_GLOBAL_SUM) {
1422     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1423     info->nz_used      = irecv[0];
1424     info->nz_allocated = irecv[1];
1425     info->nz_unneeded  = irecv[2];
1426     info->memory       = irecv[3];
1427     info->mallocs      = irecv[4];
1428   }
1429   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1430   info->fill_ratio_needed = 0;
1431   info->factor_mallocs    = 0;
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatSetOption_MPIAIJ"
1438 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1439 {
1440   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1441   PetscErrorCode ierr;
1442 
1443   PetscFunctionBegin;
1444   switch (op) {
1445   case MAT_NEW_NONZERO_LOCATIONS:
1446   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1447   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1448   case MAT_KEEP_NONZERO_PATTERN:
1449   case MAT_NEW_NONZERO_LOCATION_ERR:
1450   case MAT_USE_INODES:
1451   case MAT_IGNORE_ZERO_ENTRIES:
1452     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1453     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1454     break;
1455   case MAT_ROW_ORIENTED:
1456     a->roworiented = flg;
1457     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1458     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1459     break;
1460   case MAT_NEW_DIAGONALS:
1461     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1462     break;
1463   case MAT_IGNORE_OFF_PROC_ENTRIES:
1464     a->donotstash = PETSC_TRUE;
1465     break;
1466   case MAT_SPD:
1467     A->spd_set                         = PETSC_TRUE;
1468     A->spd                             = flg;
1469     if (flg) {
1470       A->symmetric                     = PETSC_TRUE;
1471       A->structurally_symmetric        = PETSC_TRUE;
1472       A->symmetric_set                 = PETSC_TRUE;
1473       A->structurally_symmetric_set    = PETSC_TRUE;
1474     }
1475     break;
1476   case MAT_SYMMETRIC:
1477     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1478     break;
1479   case MAT_STRUCTURALLY_SYMMETRIC:
1480     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1481     break;
1482   case MAT_HERMITIAN:
1483     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1484     break;
1485   case MAT_SYMMETRY_ETERNAL:
1486     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1487     break;
1488   default:
1489     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatGetRow_MPIAIJ"
1496 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1497 {
1498   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1499   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1500   PetscErrorCode ierr;
1501   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1502   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1503   PetscInt       *cmap,*idx_p;
1504 
1505   PetscFunctionBegin;
1506   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1507   mat->getrowactive = PETSC_TRUE;
1508 
1509   if (!mat->rowvalues && (idx || v)) {
1510     /*
1511         allocate enough space to hold information from the longest row.
1512     */
1513     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1514     PetscInt   max = 1,tmp;
1515     for (i=0; i<matin->rmap->n; i++) {
1516       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1517       if (max < tmp) { max = tmp; }
1518     }
1519     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1520   }
1521 
1522   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1523   lrow = row - rstart;
1524 
1525   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1526   if (!v)   {pvA = 0; pvB = 0;}
1527   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1528   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1529   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1530   nztot = nzA + nzB;
1531 
1532   cmap  = mat->garray;
1533   if (v  || idx) {
1534     if (nztot) {
1535       /* Sort by increasing column numbers, assuming A and B already sorted */
1536       PetscInt imark = -1;
1537       if (v) {
1538         *v = v_p = mat->rowvalues;
1539         for (i=0; i<nzB; i++) {
1540           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1541           else break;
1542         }
1543         imark = i;
1544         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1545         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1546       }
1547       if (idx) {
1548         *idx = idx_p = mat->rowindices;
1549         if (imark > -1) {
1550           for (i=0; i<imark; i++) {
1551             idx_p[i] = cmap[cworkB[i]];
1552           }
1553         } else {
1554           for (i=0; i<nzB; i++) {
1555             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1556             else break;
1557           }
1558           imark = i;
1559         }
1560         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1561         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1562       }
1563     } else {
1564       if (idx) *idx = 0;
1565       if (v)   *v   = 0;
1566     }
1567   }
1568   *nz = nztot;
1569   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1570   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1576 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1577 {
1578   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1579 
1580   PetscFunctionBegin;
1581   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1582   aij->getrowactive = PETSC_FALSE;
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatNorm_MPIAIJ"
1588 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1589 {
1590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1591   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1592   PetscErrorCode ierr;
1593   PetscInt       i,j,cstart = mat->cmap->rstart;
1594   PetscReal      sum = 0.0;
1595   MatScalar      *v;
1596 
1597   PetscFunctionBegin;
1598   if (aij->size == 1) {
1599     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1600   } else {
1601     if (type == NORM_FROBENIUS) {
1602       v = amat->a;
1603       for (i=0; i<amat->nz; i++) {
1604 #if defined(PETSC_USE_COMPLEX)
1605         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1606 #else
1607         sum += (*v)*(*v); v++;
1608 #endif
1609       }
1610       v = bmat->a;
1611       for (i=0; i<bmat->nz; i++) {
1612 #if defined(PETSC_USE_COMPLEX)
1613         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1614 #else
1615         sum += (*v)*(*v); v++;
1616 #endif
1617       }
1618       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1619       *norm = sqrt(*norm);
1620     } else if (type == NORM_1) { /* max column norm */
1621       PetscReal *tmp,*tmp2;
1622       PetscInt  *jj,*garray = aij->garray;
1623       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1624       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1625       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1626       *norm = 0.0;
1627       v = amat->a; jj = amat->j;
1628       for (j=0; j<amat->nz; j++) {
1629         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1630       }
1631       v = bmat->a; jj = bmat->j;
1632       for (j=0; j<bmat->nz; j++) {
1633         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1634       }
1635       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1636       for (j=0; j<mat->cmap->N; j++) {
1637         if (tmp2[j] > *norm) *norm = tmp2[j];
1638       }
1639       ierr = PetscFree(tmp);CHKERRQ(ierr);
1640       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1641     } else if (type == NORM_INFINITY) { /* max row norm */
1642       PetscReal ntemp = 0.0;
1643       for (j=0; j<aij->A->rmap->n; j++) {
1644         v = amat->a + amat->i[j];
1645         sum = 0.0;
1646         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1647           sum += PetscAbsScalar(*v); v++;
1648         }
1649         v = bmat->a + bmat->i[j];
1650         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1651           sum += PetscAbsScalar(*v); v++;
1652         }
1653         if (sum > ntemp) ntemp = sum;
1654       }
1655       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1656     } else {
1657       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1658     }
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatTranspose_MPIAIJ"
1665 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1666 {
1667   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1668   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1669   PetscErrorCode ierr;
1670   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1671   PetscInt       cstart=A->cmap->rstart,ncol;
1672   Mat            B;
1673   MatScalar      *array;
1674 
1675   PetscFunctionBegin;
1676   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1677 
1678   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1679   ai = Aloc->i; aj = Aloc->j;
1680   bi = Bloc->i; bj = Bloc->j;
1681   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1682     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1683     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1684     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1685     for (i=0; i<ai[ma]; i++){
1686       d_nnz[aj[i]] ++;
1687       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1688     }
1689 
1690     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1691     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1692     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1693     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1694     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1695   } else {
1696     B = *matout;
1697   }
1698 
1699   /* copy over the A part */
1700   array = Aloc->a;
1701   row = A->rmap->rstart;
1702   for (i=0; i<ma; i++) {
1703     ncol = ai[i+1]-ai[i];
1704     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1705     row++; array += ncol; aj += ncol;
1706   }
1707   aj = Aloc->j;
1708   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1709 
1710   /* copy over the B part */
1711   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1712   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1713   array = Bloc->a;
1714   row = A->rmap->rstart;
1715   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1716   cols_tmp = cols;
1717   for (i=0; i<mb; i++) {
1718     ncol = bi[i+1]-bi[i];
1719     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1720     row++; array += ncol; cols_tmp += ncol;
1721   }
1722   ierr = PetscFree(cols);CHKERRQ(ierr);
1723 
1724   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1725   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1726   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1727     *matout = B;
1728   } else {
1729     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1730   }
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1736 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1737 {
1738   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1739   Mat            a = aij->A,b = aij->B;
1740   PetscErrorCode ierr;
1741   PetscInt       s1,s2,s3;
1742 
1743   PetscFunctionBegin;
1744   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1745   if (rr) {
1746     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1747     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1748     /* Overlap communication with computation. */
1749     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1750   }
1751   if (ll) {
1752     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1753     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1754     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1755   }
1756   /* scale  the diagonal block */
1757   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1758 
1759   if (rr) {
1760     /* Do a scatter end and then right scale the off-diagonal block */
1761     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1762     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1763   }
1764 
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1770 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1771 {
1772   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1777   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1778   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1779   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1784 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1785 {
1786   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1787   PetscErrorCode ierr;
1788 
1789   PetscFunctionBegin;
1790   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatEqual_MPIAIJ"
1796 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1797 {
1798   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1799   Mat            a,b,c,d;
1800   PetscBool      flg;
1801   PetscErrorCode ierr;
1802 
1803   PetscFunctionBegin;
1804   a = matA->A; b = matA->B;
1805   c = matB->A; d = matB->B;
1806 
1807   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1808   if (flg) {
1809     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1810   }
1811   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1812   PetscFunctionReturn(0);
1813 }
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatCopy_MPIAIJ"
1817 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1818 {
1819   PetscErrorCode ierr;
1820   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1821   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1822 
1823   PetscFunctionBegin;
1824   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1825   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1826     /* because of the column compression in the off-processor part of the matrix a->B,
1827        the number of columns in a->B and b->B may be different, hence we cannot call
1828        the MatCopy() directly on the two parts. If need be, we can provide a more
1829        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1830        then copying the submatrices */
1831     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1832   } else {
1833     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1834     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1835   }
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1841 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1842 {
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatAXPY_MPIAIJ"
1852 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1853 {
1854   PetscErrorCode ierr;
1855   PetscInt       i;
1856   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1857   PetscBLASInt   bnz,one=1;
1858   Mat_SeqAIJ     *x,*y;
1859 
1860   PetscFunctionBegin;
1861   if (str == SAME_NONZERO_PATTERN) {
1862     PetscScalar alpha = a;
1863     x = (Mat_SeqAIJ *)xx->A->data;
1864     y = (Mat_SeqAIJ *)yy->A->data;
1865     bnz = PetscBLASIntCast(x->nz);
1866     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1867     x = (Mat_SeqAIJ *)xx->B->data;
1868     y = (Mat_SeqAIJ *)yy->B->data;
1869     bnz = PetscBLASIntCast(x->nz);
1870     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1871   } else if (str == SUBSET_NONZERO_PATTERN) {
1872     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1873 
1874     x = (Mat_SeqAIJ *)xx->B->data;
1875     y = (Mat_SeqAIJ *)yy->B->data;
1876     if (y->xtoy && y->XtoY != xx->B) {
1877       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1878       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1879     }
1880     if (!y->xtoy) { /* get xtoy */
1881       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1882       y->XtoY = xx->B;
1883       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1884     }
1885     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1886   } else {
1887     Mat B;
1888     PetscInt *nnz_d,*nnz_o;
1889     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1890     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1891     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1892     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1893     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1894     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1895     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1896     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1897     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1898     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1899     ierr = MatHeaderReplace(Y,B);
1900     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1901     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatConjugate_MPIAIJ"
1910 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1911 {
1912 #if defined(PETSC_USE_COMPLEX)
1913   PetscErrorCode ierr;
1914   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1918   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1919 #else
1920   PetscFunctionBegin;
1921 #endif
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatRealPart_MPIAIJ"
1927 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1928 {
1929   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1934   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1940 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1941 {
1942   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1943   PetscErrorCode ierr;
1944 
1945   PetscFunctionBegin;
1946   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1947   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #ifdef PETSC_HAVE_PBGL
1952 
1953 #include <boost/parallel/mpi/bsp_process_group.hpp>
1954 #include <boost/graph/distributed/ilu_default_graph.hpp>
1955 #include <boost/graph/distributed/ilu_0_block.hpp>
1956 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1957 #include <boost/graph/distributed/petsc/interface.hpp>
1958 #include <boost/multi_array.hpp>
1959 #include <boost/parallel/distributed_property_map->hpp>
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1963 /*
1964   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1965 */
1966 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1967 {
1968   namespace petsc = boost::distributed::petsc;
1969 
1970   namespace graph_dist = boost::graph::distributed;
1971   using boost::graph::distributed::ilu_default::process_group_type;
1972   using boost::graph::ilu_permuted;
1973 
1974   PetscBool       row_identity, col_identity;
1975   PetscContainer  c;
1976   PetscInt        m, n, M, N;
1977   PetscErrorCode  ierr;
1978 
1979   PetscFunctionBegin;
1980   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1981   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1982   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1983   if (!row_identity || !col_identity) {
1984     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1985   }
1986 
1987   process_group_type pg;
1988   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1989   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1990   lgraph_type&   level_graph = *lgraph_p;
1991   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1992 
1993   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1994   ilu_permuted(level_graph);
1995 
1996   /* put together the new matrix */
1997   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1998   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1999   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2000   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2001   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2002   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2003   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2004 
2005   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2006   ierr = PetscContainerSetPointer(c, lgraph_p);
2007   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2013 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2014 {
2015   PetscFunctionBegin;
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatSolve_MPIAIJ"
2021 /*
2022   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2023 */
2024 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2025 {
2026   namespace graph_dist = boost::graph::distributed;
2027 
2028   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2029   lgraph_type*   lgraph_p;
2030   PetscContainer c;
2031   PetscErrorCode ierr;
2032 
2033   PetscFunctionBegin;
2034   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2035   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2036   ierr = VecCopy(b, x);CHKERRQ(ierr);
2037 
2038   PetscScalar* array_x;
2039   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2040   PetscInt sx;
2041   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2042 
2043   PetscScalar* array_b;
2044   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2045   PetscInt sb;
2046   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2047 
2048   lgraph_type&   level_graph = *lgraph_p;
2049   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2050 
2051   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2052   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2053                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2054 
2055   typedef boost::iterator_property_map<array_ref_type::iterator,
2056                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2057   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2058                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2059 
2060   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2061 
2062   PetscFunctionReturn(0);
2063 }
2064 #endif
2065 
2066 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2067   PetscInt       nzlocal,nsends,nrecvs;
2068   PetscMPIInt    *send_rank,*recv_rank;
2069   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2070   PetscScalar    *sbuf_a,**rbuf_a;
2071   PetscErrorCode (*MatDestroy)(Mat);
2072 } Mat_Redundant;
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2076 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2077 {
2078   PetscErrorCode       ierr;
2079   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2080   PetscInt             i;
2081 
2082   PetscFunctionBegin;
2083   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2084   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2085   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2086   for (i=0; i<redund->nrecvs; i++){
2087     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2088     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2089   }
2090   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2091   ierr = PetscFree(redund);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatDestroy_MatRedundant"
2097 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2098 {
2099   PetscErrorCode  ierr;
2100   PetscContainer  container;
2101   Mat_Redundant   *redund=PETSC_NULL;
2102 
2103   PetscFunctionBegin;
2104   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2105   if (container) {
2106     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2107   } else {
2108     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2109   }
2110   A->ops->destroy = redund->MatDestroy;
2111   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2112   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2113   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2119 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2120 {
2121   PetscMPIInt    rank,size;
2122   MPI_Comm       comm=((PetscObject)mat)->comm;
2123   PetscErrorCode ierr;
2124   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2125   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2126   PetscInt       *rowrange=mat->rmap->range;
2127   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2128   Mat            A=aij->A,B=aij->B,C=*matredundant;
2129   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2130   PetscScalar    *sbuf_a;
2131   PetscInt       nzlocal=a->nz+b->nz;
2132   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2133   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2134   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2135   MatScalar      *aworkA,*aworkB;
2136   PetscScalar    *vals;
2137   PetscMPIInt    tag1,tag2,tag3,imdex;
2138   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2139                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2140   MPI_Status     recv_status,*send_status;
2141   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2142   PetscInt       **rbuf_j=PETSC_NULL;
2143   PetscScalar    **rbuf_a=PETSC_NULL;
2144   Mat_Redundant  *redund=PETSC_NULL;
2145   PetscContainer container;
2146 
2147   PetscFunctionBegin;
2148   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2149   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2150 
2151   if (reuse == MAT_REUSE_MATRIX) {
2152     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2153     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2154     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2155     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2156     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2157     if (container) {
2158       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2159     } else {
2160       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2161     }
2162     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2163 
2164     nsends    = redund->nsends;
2165     nrecvs    = redund->nrecvs;
2166     send_rank = redund->send_rank;
2167     recv_rank = redund->recv_rank;
2168     sbuf_nz   = redund->sbuf_nz;
2169     rbuf_nz   = redund->rbuf_nz;
2170     sbuf_j    = redund->sbuf_j;
2171     sbuf_a    = redund->sbuf_a;
2172     rbuf_j    = redund->rbuf_j;
2173     rbuf_a    = redund->rbuf_a;
2174   }
2175 
2176   if (reuse == MAT_INITIAL_MATRIX){
2177     PetscMPIInt  subrank,subsize;
2178     PetscInt     nleftover,np_subcomm;
2179     /* get the destination processors' id send_rank, nsends and nrecvs */
2180     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2181     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2182     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2183     np_subcomm = size/nsubcomm;
2184     nleftover  = size - nsubcomm*np_subcomm;
2185     nsends = 0; nrecvs = 0;
2186     for (i=0; i<size; i++){ /* i=rank*/
2187       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2188         send_rank[nsends] = i; nsends++;
2189         recv_rank[nrecvs++] = i;
2190       }
2191     }
2192     if (rank >= size - nleftover){/* this proc is a leftover processor */
2193       i = size-nleftover-1;
2194       j = 0;
2195       while (j < nsubcomm - nleftover){
2196         send_rank[nsends++] = i;
2197         i--; j++;
2198       }
2199     }
2200 
2201     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2202       for (i=0; i<nleftover; i++){
2203         recv_rank[nrecvs++] = size-nleftover+i;
2204       }
2205     }
2206 
2207     /* allocate sbuf_j, sbuf_a */
2208     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2209     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2210     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2211   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2212 
2213   /* copy mat's local entries into the buffers */
2214   if (reuse == MAT_INITIAL_MATRIX){
2215     rownz_max = 0;
2216     rptr = sbuf_j;
2217     cols = sbuf_j + rend-rstart + 1;
2218     vals = sbuf_a;
2219     rptr[0] = 0;
2220     for (i=0; i<rend-rstart; i++){
2221       row = i + rstart;
2222       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2223       ncols  = nzA + nzB;
2224       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2225       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2226       /* load the column indices for this row into cols */
2227       lwrite = 0;
2228       for (l=0; l<nzB; l++) {
2229         if ((ctmp = bmap[cworkB[l]]) < cstart){
2230           vals[lwrite]   = aworkB[l];
2231           cols[lwrite++] = ctmp;
2232         }
2233       }
2234       for (l=0; l<nzA; l++){
2235         vals[lwrite]   = aworkA[l];
2236         cols[lwrite++] = cstart + cworkA[l];
2237       }
2238       for (l=0; l<nzB; l++) {
2239         if ((ctmp = bmap[cworkB[l]]) >= cend){
2240           vals[lwrite]   = aworkB[l];
2241           cols[lwrite++] = ctmp;
2242         }
2243       }
2244       vals += ncols;
2245       cols += ncols;
2246       rptr[i+1] = rptr[i] + ncols;
2247       if (rownz_max < ncols) rownz_max = ncols;
2248     }
2249     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2250   } else { /* only copy matrix values into sbuf_a */
2251     rptr = sbuf_j;
2252     vals = sbuf_a;
2253     rptr[0] = 0;
2254     for (i=0; i<rend-rstart; i++){
2255       row = i + rstart;
2256       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2257       ncols  = nzA + nzB;
2258       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2259       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2260       lwrite = 0;
2261       for (l=0; l<nzB; l++) {
2262         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2263       }
2264       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2265       for (l=0; l<nzB; l++) {
2266         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2267       }
2268       vals += ncols;
2269       rptr[i+1] = rptr[i] + ncols;
2270     }
2271   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2272 
2273   /* send nzlocal to others, and recv other's nzlocal */
2274   /*--------------------------------------------------*/
2275   if (reuse == MAT_INITIAL_MATRIX){
2276     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2277     s_waits2 = s_waits3 + nsends;
2278     s_waits1 = s_waits2 + nsends;
2279     r_waits1 = s_waits1 + nsends;
2280     r_waits2 = r_waits1 + nrecvs;
2281     r_waits3 = r_waits2 + nrecvs;
2282   } else {
2283     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2284     r_waits3 = s_waits3 + nsends;
2285   }
2286 
2287   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2288   if (reuse == MAT_INITIAL_MATRIX){
2289     /* get new tags to keep the communication clean */
2290     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2291     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2292     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2293 
2294     /* post receives of other's nzlocal */
2295     for (i=0; i<nrecvs; i++){
2296       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2297     }
2298     /* send nzlocal to others */
2299     for (i=0; i<nsends; i++){
2300       sbuf_nz[i] = nzlocal;
2301       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2302     }
2303     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2304     count = nrecvs;
2305     while (count) {
2306       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2307       recv_rank[imdex] = recv_status.MPI_SOURCE;
2308       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2309       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2310 
2311       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2312       rbuf_nz[imdex] += i + 2;
2313       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2314       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2315       count--;
2316     }
2317     /* wait on sends of nzlocal */
2318     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2319     /* send mat->i,j to others, and recv from other's */
2320     /*------------------------------------------------*/
2321     for (i=0; i<nsends; i++){
2322       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2323       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2324     }
2325     /* wait on receives of mat->i,j */
2326     /*------------------------------*/
2327     count = nrecvs;
2328     while (count) {
2329       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2330       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2331       count--;
2332     }
2333     /* wait on sends of mat->i,j */
2334     /*---------------------------*/
2335     if (nsends) {
2336       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2337     }
2338   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2339 
2340   /* post receives, send and receive mat->a */
2341   /*----------------------------------------*/
2342   for (imdex=0; imdex<nrecvs; imdex++) {
2343     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2344   }
2345   for (i=0; i<nsends; i++){
2346     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2347   }
2348   count = nrecvs;
2349   while (count) {
2350     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2351     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2352     count--;
2353   }
2354   if (nsends) {
2355     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2356   }
2357 
2358   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2359 
2360   /* create redundant matrix */
2361   /*-------------------------*/
2362   if (reuse == MAT_INITIAL_MATRIX){
2363     /* compute rownz_max for preallocation */
2364     for (imdex=0; imdex<nrecvs; imdex++){
2365       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2366       rptr = rbuf_j[imdex];
2367       for (i=0; i<j; i++){
2368         ncols = rptr[i+1] - rptr[i];
2369         if (rownz_max < ncols) rownz_max = ncols;
2370       }
2371     }
2372 
2373     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2374     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2375     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2376     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2377     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2378   } else {
2379     C = *matredundant;
2380   }
2381 
2382   /* insert local matrix entries */
2383   rptr = sbuf_j;
2384   cols = sbuf_j + rend-rstart + 1;
2385   vals = sbuf_a;
2386   for (i=0; i<rend-rstart; i++){
2387     row   = i + rstart;
2388     ncols = rptr[i+1] - rptr[i];
2389     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2390     vals += ncols;
2391     cols += ncols;
2392   }
2393   /* insert received matrix entries */
2394   for (imdex=0; imdex<nrecvs; imdex++){
2395     rstart = rowrange[recv_rank[imdex]];
2396     rend   = rowrange[recv_rank[imdex]+1];
2397     rptr = rbuf_j[imdex];
2398     cols = rbuf_j[imdex] + rend-rstart + 1;
2399     vals = rbuf_a[imdex];
2400     for (i=0; i<rend-rstart; i++){
2401       row   = i + rstart;
2402       ncols = rptr[i+1] - rptr[i];
2403       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2404       vals += ncols;
2405       cols += ncols;
2406     }
2407   }
2408   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2411   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2412   if (reuse == MAT_INITIAL_MATRIX){
2413     PetscContainer container;
2414     *matredundant = C;
2415     /* create a supporting struct and attach it to C for reuse */
2416     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2417     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2418     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2419     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2420     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2421 
2422     redund->nzlocal = nzlocal;
2423     redund->nsends  = nsends;
2424     redund->nrecvs  = nrecvs;
2425     redund->send_rank = send_rank;
2426     redund->recv_rank = recv_rank;
2427     redund->sbuf_nz = sbuf_nz;
2428     redund->rbuf_nz = rbuf_nz;
2429     redund->sbuf_j  = sbuf_j;
2430     redund->sbuf_a  = sbuf_a;
2431     redund->rbuf_j  = rbuf_j;
2432     redund->rbuf_a  = rbuf_a;
2433 
2434     redund->MatDestroy = C->ops->destroy;
2435     C->ops->destroy    = MatDestroy_MatRedundant;
2436   }
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2442 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2443 {
2444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       i,*idxb = 0;
2447   PetscScalar    *va,*vb;
2448   Vec            vtmp;
2449 
2450   PetscFunctionBegin;
2451   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2452   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2453   if (idx) {
2454     for (i=0; i<A->rmap->n; i++) {
2455       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2456     }
2457   }
2458 
2459   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2460   if (idx) {
2461     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2462   }
2463   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2464   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2465 
2466   for (i=0; i<A->rmap->n; i++){
2467     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2468       va[i] = vb[i];
2469       if (idx) idx[i] = a->garray[idxb[i]];
2470     }
2471   }
2472 
2473   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2474   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2475   if (idxb) {
2476     ierr = PetscFree(idxb);CHKERRQ(ierr);
2477   }
2478   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2484 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2485 {
2486   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2487   PetscErrorCode ierr;
2488   PetscInt       i,*idxb = 0;
2489   PetscScalar    *va,*vb;
2490   Vec            vtmp;
2491 
2492   PetscFunctionBegin;
2493   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2494   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2495   if (idx) {
2496     for (i=0; i<A->cmap->n; i++) {
2497       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2498     }
2499   }
2500 
2501   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2502   if (idx) {
2503     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2504   }
2505   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2506   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2507 
2508   for (i=0; i<A->rmap->n; i++){
2509     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2510       va[i] = vb[i];
2511       if (idx) idx[i] = a->garray[idxb[i]];
2512     }
2513   }
2514 
2515   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2516   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2517   if (idxb) {
2518     ierr = PetscFree(idxb);CHKERRQ(ierr);
2519   }
2520   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2526 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2527 {
2528   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2529   PetscInt       n      = A->rmap->n;
2530   PetscInt       cstart = A->cmap->rstart;
2531   PetscInt      *cmap   = mat->garray;
2532   PetscInt      *diagIdx, *offdiagIdx;
2533   Vec            diagV, offdiagV;
2534   PetscScalar   *a, *diagA, *offdiagA;
2535   PetscInt       r;
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2540   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2541   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2542   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2543   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2544   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2545   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2546   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2547   for(r = 0; r < n; ++r) {
2548     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2549       a[r]   = diagA[r];
2550       idx[r] = cstart + diagIdx[r];
2551     } else {
2552       a[r]   = offdiagA[r];
2553       idx[r] = cmap[offdiagIdx[r]];
2554     }
2555   }
2556   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2557   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2558   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2559   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2560   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2561   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2567 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2568 {
2569   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2570   PetscInt       n      = A->rmap->n;
2571   PetscInt       cstart = A->cmap->rstart;
2572   PetscInt      *cmap   = mat->garray;
2573   PetscInt      *diagIdx, *offdiagIdx;
2574   Vec            diagV, offdiagV;
2575   PetscScalar   *a, *diagA, *offdiagA;
2576   PetscInt       r;
2577   PetscErrorCode ierr;
2578 
2579   PetscFunctionBegin;
2580   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2581   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2582   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2583   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2584   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2585   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2586   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2587   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2588   for(r = 0; r < n; ++r) {
2589     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2590       a[r]   = diagA[r];
2591       idx[r] = cstart + diagIdx[r];
2592     } else {
2593       a[r]   = offdiagA[r];
2594       idx[r] = cmap[offdiagIdx[r]];
2595     }
2596   }
2597   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2598   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2599   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2600   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2601   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2602   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 #undef __FUNCT__
2607 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2608 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2609 {
2610   PetscErrorCode ierr;
2611   Mat            *dummy;
2612 
2613   PetscFunctionBegin;
2614   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2615   *newmat = *dummy;
2616   ierr = PetscFree(dummy);CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2621 /* -------------------------------------------------------------------*/
2622 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2623        MatGetRow_MPIAIJ,
2624        MatRestoreRow_MPIAIJ,
2625        MatMult_MPIAIJ,
2626 /* 4*/ MatMultAdd_MPIAIJ,
2627        MatMultTranspose_MPIAIJ,
2628        MatMultTransposeAdd_MPIAIJ,
2629 #ifdef PETSC_HAVE_PBGL
2630        MatSolve_MPIAIJ,
2631 #else
2632        0,
2633 #endif
2634        0,
2635        0,
2636 /*10*/ 0,
2637        0,
2638        0,
2639        MatSOR_MPIAIJ,
2640        MatTranspose_MPIAIJ,
2641 /*15*/ MatGetInfo_MPIAIJ,
2642        MatEqual_MPIAIJ,
2643        MatGetDiagonal_MPIAIJ,
2644        MatDiagonalScale_MPIAIJ,
2645        MatNorm_MPIAIJ,
2646 /*20*/ MatAssemblyBegin_MPIAIJ,
2647        MatAssemblyEnd_MPIAIJ,
2648        MatSetOption_MPIAIJ,
2649        MatZeroEntries_MPIAIJ,
2650 /*24*/ MatZeroRows_MPIAIJ,
2651        0,
2652 #ifdef PETSC_HAVE_PBGL
2653        0,
2654 #else
2655        0,
2656 #endif
2657        0,
2658        0,
2659 /*29*/ MatSetUpPreallocation_MPIAIJ,
2660 #ifdef PETSC_HAVE_PBGL
2661        0,
2662 #else
2663        0,
2664 #endif
2665        0,
2666        0,
2667        0,
2668 /*34*/ MatDuplicate_MPIAIJ,
2669        0,
2670        0,
2671        0,
2672        0,
2673 /*39*/ MatAXPY_MPIAIJ,
2674        MatGetSubMatrices_MPIAIJ,
2675        MatIncreaseOverlap_MPIAIJ,
2676        MatGetValues_MPIAIJ,
2677        MatCopy_MPIAIJ,
2678 /*44*/ MatGetRowMax_MPIAIJ,
2679        MatScale_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683 /*49*/ MatSetBlockSize_MPIAIJ,
2684        0,
2685        0,
2686        0,
2687        0,
2688 /*54*/ MatFDColoringCreate_MPIAIJ,
2689        0,
2690        MatSetUnfactored_MPIAIJ,
2691        MatPermute_MPIAIJ,
2692        0,
2693 /*59*/ MatGetSubMatrix_MPIAIJ,
2694        MatDestroy_MPIAIJ,
2695        MatView_MPIAIJ,
2696        0,
2697        0,
2698 /*64*/ 0,
2699        0,
2700        0,
2701        0,
2702        0,
2703 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2704        MatGetRowMinAbs_MPIAIJ,
2705        0,
2706        MatSetColoring_MPIAIJ,
2707 #if defined(PETSC_HAVE_ADIC)
2708        MatSetValuesAdic_MPIAIJ,
2709 #else
2710        0,
2711 #endif
2712        MatSetValuesAdifor_MPIAIJ,
2713 /*75*/ MatFDColoringApply_AIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*80*/ 0,
2719        0,
2720        0,
2721 /*83*/ MatLoad_MPIAIJ,
2722        0,
2723        0,
2724        0,
2725        0,
2726        0,
2727 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2728        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2729        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2730        MatPtAP_Basic,
2731        MatPtAPSymbolic_MPIAIJ,
2732 /*94*/ MatPtAPNumeric_MPIAIJ,
2733        0,
2734        0,
2735        0,
2736        0,
2737 /*99*/ 0,
2738        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2739        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2740        MatConjugate_MPIAIJ,
2741        0,
2742 /*104*/MatSetValuesRow_MPIAIJ,
2743        MatRealPart_MPIAIJ,
2744        MatImaginaryPart_MPIAIJ,
2745        0,
2746        0,
2747 /*109*/0,
2748        MatGetRedundantMatrix_MPIAIJ,
2749        MatGetRowMin_MPIAIJ,
2750        0,
2751        0,
2752 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2753        0,
2754        0,
2755        0,
2756        0,
2757 /*119*/0,
2758        0,
2759        0,
2760        0,
2761        MatGetMultiProcBlock_MPIAIJ
2762 };
2763 
2764 /* ----------------------------------------------------------------------------------------*/
2765 
2766 EXTERN_C_BEGIN
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2769 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2770 {
2771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2772   PetscErrorCode ierr;
2773 
2774   PetscFunctionBegin;
2775   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2776   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 EXTERN_C_END
2780 
2781 EXTERN_C_BEGIN
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2784 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2785 {
2786   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2787   PetscErrorCode ierr;
2788 
2789   PetscFunctionBegin;
2790   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2791   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 EXTERN_C_END
2795 
2796 EXTERN_C_BEGIN
2797 #undef __FUNCT__
2798 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2799 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2800 {
2801   Mat_MPIAIJ     *b;
2802   PetscErrorCode ierr;
2803   PetscInt       i;
2804 
2805   PetscFunctionBegin;
2806   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2807   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2808   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2809   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2810 
2811   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2812   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2813   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2814   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2815   if (d_nnz) {
2816     for (i=0; i<B->rmap->n; i++) {
2817       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2818     }
2819   }
2820   if (o_nnz) {
2821     for (i=0; i<B->rmap->n; i++) {
2822       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2823     }
2824   }
2825   b = (Mat_MPIAIJ*)B->data;
2826 
2827   if (!B->preallocated) {
2828     /* Explicitly create 2 MATSEQAIJ matrices. */
2829     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2830     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2831     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2832     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2833     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2834     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2835     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2836     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2837   }
2838 
2839   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2840   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2841   B->preallocated = PETSC_TRUE;
2842   PetscFunctionReturn(0);
2843 }
2844 EXTERN_C_END
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2848 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2849 {
2850   Mat            mat;
2851   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2852   PetscErrorCode ierr;
2853 
2854   PetscFunctionBegin;
2855   *newmat       = 0;
2856   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2857   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2858   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2859   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2860   a    = (Mat_MPIAIJ*)mat->data;
2861 
2862   mat->factortype    = matin->factortype;
2863   mat->rmap->bs      = matin->rmap->bs;
2864   mat->assembled    = PETSC_TRUE;
2865   mat->insertmode   = NOT_SET_VALUES;
2866   mat->preallocated = PETSC_TRUE;
2867 
2868   a->size           = oldmat->size;
2869   a->rank           = oldmat->rank;
2870   a->donotstash     = oldmat->donotstash;
2871   a->roworiented    = oldmat->roworiented;
2872   a->rowindices     = 0;
2873   a->rowvalues      = 0;
2874   a->getrowactive   = PETSC_FALSE;
2875 
2876   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2877   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2878 
2879   if (oldmat->colmap) {
2880 #if defined (PETSC_USE_CTABLE)
2881     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2882 #else
2883     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2884     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2885     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2886 #endif
2887   } else a->colmap = 0;
2888   if (oldmat->garray) {
2889     PetscInt len;
2890     len  = oldmat->B->cmap->n;
2891     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2892     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2893     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2894   } else a->garray = 0;
2895 
2896   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2897   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2898   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2899   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2900   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2901   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2902   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2903   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2904   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2905   *newmat = mat;
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 /*
2910     Allows sending/receiving larger messages then 2 gigabytes in a single call
2911 */
2912 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2913 {
2914   int             ierr;
2915   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2916   PetscInt        i,numchunks;
2917   PetscMPIInt     icnt;
2918 
2919   numchunks = cnt/CHUNKSIZE + 1;
2920   for (i=0; i<numchunks; i++) {
2921     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2922     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2923     if (type == MPIU_INT) {
2924       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2925     } else if (type == MPIU_SCALAR) {
2926       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2927     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2928   }
2929   return 0;
2930 }
2931 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2932 {
2933   int             ierr;
2934   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2935   MPI_Status      status;
2936   PetscInt        i,numchunks;
2937   PetscMPIInt     icnt;
2938 
2939   numchunks = cnt/CHUNKSIZE + 1;
2940   for (i=0; i<numchunks; i++) {
2941     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2942     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2943     if (type == MPIU_INT) {
2944       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2945     } else if (type == MPIU_SCALAR) {
2946       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2947     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2948   }
2949   return 0;
2950 }
2951 
2952 #undef __FUNCT__
2953 #define __FUNCT__ "MatLoad_MPIAIJ"
2954 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2955 {
2956   PetscScalar    *vals,*svals;
2957   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2958   PetscErrorCode ierr;
2959   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2960   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2961   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2962   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2963   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2964   int            fd;
2965 
2966   PetscFunctionBegin;
2967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2968   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2969   if (!rank) {
2970     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2971     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2972     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2973   }
2974 
2975   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2976 
2977   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2978   M = header[1]; N = header[2];
2979   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2980   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2981   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2982 
2983   /* If global sizes are set, check if they are consistent with that given in the file */
2984   if (sizesset) {
2985     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2986   }
2987   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2988   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2989 
2990   /* determine ownership of all rows */
2991   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
2992   else m = newMat->rmap->n; /* Set by user */
2993 
2994   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2995   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2996 
2997   /* First process needs enough room for process with most rows */
2998   if (!rank) {
2999     mmax       = rowners[1];
3000     for (i=2; i<size; i++) {
3001       mmax = PetscMax(mmax,rowners[i]);
3002     }
3003   } else mmax = m;
3004 
3005   rowners[0] = 0;
3006   for (i=2; i<=size; i++) {
3007     rowners[i] += rowners[i-1];
3008   }
3009   rstart = rowners[rank];
3010   rend   = rowners[rank+1];
3011 
3012   /* distribute row lengths to all processors */
3013   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3014   if (!rank) {
3015     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3016     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3017     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3018     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3019     for (j=0; j<m; j++) {
3020       procsnz[0] += ourlens[j];
3021     }
3022     for (i=1; i<size; i++) {
3023       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3024       /* calculate the number of nonzeros on each processor */
3025       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3026         procsnz[i] += rowlengths[j];
3027       }
3028       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3029     }
3030     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3031   } else {
3032     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3033   }
3034 
3035   if (!rank) {
3036     /* determine max buffer needed and allocate it */
3037     maxnz = 0;
3038     for (i=0; i<size; i++) {
3039       maxnz = PetscMax(maxnz,procsnz[i]);
3040     }
3041     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3042 
3043     /* read in my part of the matrix column indices  */
3044     nz   = procsnz[0];
3045     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3046     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3047 
3048     /* read in every one elses and ship off */
3049     for (i=1; i<size; i++) {
3050       nz     = procsnz[i];
3051       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3052       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3053     }
3054     ierr = PetscFree(cols);CHKERRQ(ierr);
3055   } else {
3056     /* determine buffer space needed for message */
3057     nz = 0;
3058     for (i=0; i<m; i++) {
3059       nz += ourlens[i];
3060     }
3061     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3062 
3063     /* receive message of column indices*/
3064     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3065   }
3066 
3067   /* determine column ownership if matrix is not square */
3068   if (N != M) {
3069     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3070     else n = newMat->cmap->n;
3071     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3072     cstart = cend - n;
3073   } else {
3074     cstart = rstart;
3075     cend   = rend;
3076     n      = cend - cstart;
3077   }
3078 
3079   /* loop over local rows, determining number of off diagonal entries */
3080   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3081   jj = 0;
3082   for (i=0; i<m; i++) {
3083     for (j=0; j<ourlens[i]; j++) {
3084       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3085       jj++;
3086     }
3087   }
3088 
3089   for (i=0; i<m; i++) {
3090     ourlens[i] -= offlens[i];
3091   }
3092   if (!sizesset) {
3093     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3094   }
3095   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3096 
3097   for (i=0; i<m; i++) {
3098     ourlens[i] += offlens[i];
3099   }
3100 
3101   if (!rank) {
3102     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3103 
3104     /* read in my part of the matrix numerical values  */
3105     nz   = procsnz[0];
3106     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3107 
3108     /* insert into matrix */
3109     jj      = rstart;
3110     smycols = mycols;
3111     svals   = vals;
3112     for (i=0; i<m; i++) {
3113       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3114       smycols += ourlens[i];
3115       svals   += ourlens[i];
3116       jj++;
3117     }
3118 
3119     /* read in other processors and ship out */
3120     for (i=1; i<size; i++) {
3121       nz     = procsnz[i];
3122       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3123       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3124     }
3125     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3126   } else {
3127     /* receive numeric values */
3128     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3129 
3130     /* receive message of values*/
3131     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3132 
3133     /* insert into matrix */
3134     jj      = rstart;
3135     smycols = mycols;
3136     svals   = vals;
3137     for (i=0; i<m; i++) {
3138       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3139       smycols += ourlens[i];
3140       svals   += ourlens[i];
3141       jj++;
3142     }
3143   }
3144   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3145   ierr = PetscFree(vals);CHKERRQ(ierr);
3146   ierr = PetscFree(mycols);CHKERRQ(ierr);
3147   ierr = PetscFree(rowners);CHKERRQ(ierr);
3148 
3149   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3156 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3157 {
3158   PetscErrorCode ierr;
3159   IS             iscol_local;
3160   PetscInt       csize;
3161 
3162   PetscFunctionBegin;
3163   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3164   if (call == MAT_REUSE_MATRIX) {
3165     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3166     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3167   } else {
3168     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3169   }
3170   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3171   if (call == MAT_INITIAL_MATRIX) {
3172     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3173     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3174   }
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3180 /*
3181     Not great since it makes two copies of the submatrix, first an SeqAIJ
3182   in local and then by concatenating the local matrices the end result.
3183   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3184 
3185   Note: This requires a sequential iscol with all indices.
3186 */
3187 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3188 {
3189   PetscErrorCode ierr;
3190   PetscMPIInt    rank,size;
3191   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3192   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3193   Mat            *local,M,Mreuse;
3194   MatScalar      *vwork,*aa;
3195   MPI_Comm       comm = ((PetscObject)mat)->comm;
3196   Mat_SeqAIJ     *aij;
3197 
3198 
3199   PetscFunctionBegin;
3200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3202 
3203   if (call ==  MAT_REUSE_MATRIX) {
3204     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3205     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3206     local = &Mreuse;
3207     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3208   } else {
3209     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3210     Mreuse = *local;
3211     ierr   = PetscFree(local);CHKERRQ(ierr);
3212   }
3213 
3214   /*
3215       m - number of local rows
3216       n - number of columns (same on all processors)
3217       rstart - first row in new global matrix generated
3218   */
3219   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3220   if (call == MAT_INITIAL_MATRIX) {
3221     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3222     ii  = aij->i;
3223     jj  = aij->j;
3224 
3225     /*
3226         Determine the number of non-zeros in the diagonal and off-diagonal
3227         portions of the matrix in order to do correct preallocation
3228     */
3229 
3230     /* first get start and end of "diagonal" columns */
3231     if (csize == PETSC_DECIDE) {
3232       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3233       if (mglobal == n) { /* square matrix */
3234 	nlocal = m;
3235       } else {
3236         nlocal = n/size + ((n % size) > rank);
3237       }
3238     } else {
3239       nlocal = csize;
3240     }
3241     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3242     rstart = rend - nlocal;
3243     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3244 
3245     /* next, compute all the lengths */
3246     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3247     olens = dlens + m;
3248     for (i=0; i<m; i++) {
3249       jend = ii[i+1] - ii[i];
3250       olen = 0;
3251       dlen = 0;
3252       for (j=0; j<jend; j++) {
3253         if (*jj < rstart || *jj >= rend) olen++;
3254         else dlen++;
3255         jj++;
3256       }
3257       olens[i] = olen;
3258       dlens[i] = dlen;
3259     }
3260     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3261     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3262     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3263     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3264     ierr = PetscFree(dlens);CHKERRQ(ierr);
3265   } else {
3266     PetscInt ml,nl;
3267 
3268     M = *newmat;
3269     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3270     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3271     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3272     /*
3273          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3274        rather than the slower MatSetValues().
3275     */
3276     M->was_assembled = PETSC_TRUE;
3277     M->assembled     = PETSC_FALSE;
3278   }
3279   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3280   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3281   ii  = aij->i;
3282   jj  = aij->j;
3283   aa  = aij->a;
3284   for (i=0; i<m; i++) {
3285     row   = rstart + i;
3286     nz    = ii[i+1] - ii[i];
3287     cwork = jj;     jj += nz;
3288     vwork = aa;     aa += nz;
3289     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3290   }
3291 
3292   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3293   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3294   *newmat = M;
3295 
3296   /* save submatrix used in processor for next request */
3297   if (call ==  MAT_INITIAL_MATRIX) {
3298     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3299     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3300   }
3301 
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 EXTERN_C_BEGIN
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3308 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3309 {
3310   PetscInt       m,cstart, cend,j,nnz,i,d;
3311   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3312   const PetscInt *JJ;
3313   PetscScalar    *values;
3314   PetscErrorCode ierr;
3315 
3316   PetscFunctionBegin;
3317   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3318 
3319   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3320   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3321   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3322   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3323   m      = B->rmap->n;
3324   cstart = B->cmap->rstart;
3325   cend   = B->cmap->rend;
3326   rstart = B->rmap->rstart;
3327 
3328   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3329 
3330 #if defined(PETSC_USE_DEBUGGING)
3331   for (i=0; i<m; i++) {
3332     nnz     = Ii[i+1]- Ii[i];
3333     JJ      = J + Ii[i];
3334     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3335     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3336     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3337   }
3338 #endif
3339 
3340   for (i=0; i<m; i++) {
3341     nnz     = Ii[i+1]- Ii[i];
3342     JJ      = J + Ii[i];
3343     nnz_max = PetscMax(nnz_max,nnz);
3344     d       = 0;
3345     for (j=0; j<nnz; j++) {
3346       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3347     }
3348     d_nnz[i] = d;
3349     o_nnz[i] = nnz - d;
3350   }
3351   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3352   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3353 
3354   if (v) values = (PetscScalar*)v;
3355   else {
3356     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3357     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3358   }
3359 
3360   for (i=0; i<m; i++) {
3361     ii   = i + rstart;
3362     nnz  = Ii[i+1]- Ii[i];
3363     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3364   }
3365   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3366   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3367 
3368   if (!v) {
3369     ierr = PetscFree(values);CHKERRQ(ierr);
3370   }
3371   PetscFunctionReturn(0);
3372 }
3373 EXTERN_C_END
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3377 /*@
3378    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3379    (the default parallel PETSc format).
3380 
3381    Collective on MPI_Comm
3382 
3383    Input Parameters:
3384 +  B - the matrix
3385 .  i - the indices into j for the start of each local row (starts with zero)
3386 .  j - the column indices for each local row (starts with zero)
3387 -  v - optional values in the matrix
3388 
3389    Level: developer
3390 
3391    Notes:
3392        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3393      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3394      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3395 
3396        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3397 
3398        The format which is used for the sparse matrix input, is equivalent to a
3399     row-major ordering.. i.e for the following matrix, the input data expected is
3400     as shown:
3401 
3402         1 0 0
3403         2 0 3     P0
3404        -------
3405         4 5 6     P1
3406 
3407      Process0 [P0]: rows_owned=[0,1]
3408         i =  {0,1,3}  [size = nrow+1  = 2+1]
3409         j =  {0,0,2}  [size = nz = 6]
3410         v =  {1,2,3}  [size = nz = 6]
3411 
3412      Process1 [P1]: rows_owned=[2]
3413         i =  {0,3}    [size = nrow+1  = 1+1]
3414         j =  {0,1,2}  [size = nz = 6]
3415         v =  {4,5,6}  [size = nz = 6]
3416 
3417 .keywords: matrix, aij, compressed row, sparse, parallel
3418 
3419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3420           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3421 @*/
3422 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3423 {
3424   PetscErrorCode ierr;
3425 
3426   PetscFunctionBegin;
3427   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3433 /*@C
3434    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3435    (the default parallel PETSc format).  For good matrix assembly performance
3436    the user should preallocate the matrix storage by setting the parameters
3437    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3438    performance can be increased by more than a factor of 50.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  A - the matrix
3444 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3445            (same value is used for all local rows)
3446 .  d_nnz - array containing the number of nonzeros in the various rows of the
3447            DIAGONAL portion of the local submatrix (possibly different for each row)
3448            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3449            The size of this array is equal to the number of local rows, i.e 'm'.
3450            You must leave room for the diagonal entry even if it is zero.
3451 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3452            submatrix (same value is used for all local rows).
3453 -  o_nnz - array containing the number of nonzeros in the various rows of the
3454            OFF-DIAGONAL portion of the local submatrix (possibly different for
3455            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3456            structure. The size of this array is equal to the number
3457            of local rows, i.e 'm'.
3458 
3459    If the *_nnz parameter is given then the *_nz parameter is ignored
3460 
3461    The AIJ format (also called the Yale sparse matrix format or
3462    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3463    storage.  The stored row and column indices begin with zero.
3464    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3465 
3466    The parallel matrix is partitioned such that the first m0 rows belong to
3467    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3468    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3469 
3470    The DIAGONAL portion of the local submatrix of a processor can be defined
3471    as the submatrix which is obtained by extraction the part corresponding to
3472    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3473    first row that belongs to the processor, r2 is the last row belonging to
3474    the this processor, and c1-c2 is range of indices of the local part of a
3475    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3476    common case of a square matrix, the row and column ranges are the same and
3477    the DIAGONAL part is also square. The remaining portion of the local
3478    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3479 
3480    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3481 
3482    You can call MatGetInfo() to get information on how effective the preallocation was;
3483    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3484    You can also run with the option -info and look for messages with the string
3485    malloc in them to see if additional memory allocation was needed.
3486 
3487    Example usage:
3488 
3489    Consider the following 8x8 matrix with 34 non-zero values, that is
3490    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3491    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3492    as follows:
3493 
3494 .vb
3495             1  2  0  |  0  3  0  |  0  4
3496     Proc0   0  5  6  |  7  0  0  |  8  0
3497             9  0 10  | 11  0  0  | 12  0
3498     -------------------------------------
3499            13  0 14  | 15 16 17  |  0  0
3500     Proc1   0 18  0  | 19 20 21  |  0  0
3501             0  0  0  | 22 23  0  | 24  0
3502     -------------------------------------
3503     Proc2  25 26 27  |  0  0 28  | 29  0
3504            30  0  0  | 31 32 33  |  0 34
3505 .ve
3506 
3507    This can be represented as a collection of submatrices as:
3508 
3509 .vb
3510       A B C
3511       D E F
3512       G H I
3513 .ve
3514 
3515    Where the submatrices A,B,C are owned by proc0, D,E,F are
3516    owned by proc1, G,H,I are owned by proc2.
3517 
3518    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3519    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3520    The 'M','N' parameters are 8,8, and have the same values on all procs.
3521 
3522    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3523    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3524    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3525    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3526    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3527    matrix, ans [DF] as another SeqAIJ matrix.
3528 
3529    When d_nz, o_nz parameters are specified, d_nz storage elements are
3530    allocated for every row of the local diagonal submatrix, and o_nz
3531    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3532    One way to choose d_nz and o_nz is to use the max nonzerors per local
3533    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3534    In this case, the values of d_nz,o_nz are:
3535 .vb
3536      proc0 : dnz = 2, o_nz = 2
3537      proc1 : dnz = 3, o_nz = 2
3538      proc2 : dnz = 1, o_nz = 4
3539 .ve
3540    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3541    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3542    for proc3. i.e we are using 12+15+10=37 storage locations to store
3543    34 values.
3544 
3545    When d_nnz, o_nnz parameters are specified, the storage is specified
3546    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3547    In the above case the values for d_nnz,o_nnz are:
3548 .vb
3549      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3550      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3551      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3552 .ve
3553    Here the space allocated is sum of all the above values i.e 34, and
3554    hence pre-allocation is perfect.
3555 
3556    Level: intermediate
3557 
3558 .keywords: matrix, aij, compressed row, sparse, parallel
3559 
3560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3561           MPIAIJ, MatGetInfo()
3562 @*/
3563 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3564 {
3565   PetscErrorCode ierr;
3566 
3567   PetscFunctionBegin;
3568   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3569   PetscFunctionReturn(0);
3570 }
3571 
3572 #undef __FUNCT__
3573 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3574 /*@
3575      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3576          CSR format the local rows.
3577 
3578    Collective on MPI_Comm
3579 
3580    Input Parameters:
3581 +  comm - MPI communicator
3582 .  m - number of local rows (Cannot be PETSC_DECIDE)
3583 .  n - This value should be the same as the local size used in creating the
3584        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3585        calculated if N is given) For square matrices n is almost always m.
3586 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3587 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3588 .   i - row indices
3589 .   j - column indices
3590 -   a - matrix values
3591 
3592    Output Parameter:
3593 .   mat - the matrix
3594 
3595    Level: intermediate
3596 
3597    Notes:
3598        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3599      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3600      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3601 
3602        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3603 
3604        The format which is used for the sparse matrix input, is equivalent to a
3605     row-major ordering.. i.e for the following matrix, the input data expected is
3606     as shown:
3607 
3608         1 0 0
3609         2 0 3     P0
3610        -------
3611         4 5 6     P1
3612 
3613      Process0 [P0]: rows_owned=[0,1]
3614         i =  {0,1,3}  [size = nrow+1  = 2+1]
3615         j =  {0,0,2}  [size = nz = 6]
3616         v =  {1,2,3}  [size = nz = 6]
3617 
3618      Process1 [P1]: rows_owned=[2]
3619         i =  {0,3}    [size = nrow+1  = 1+1]
3620         j =  {0,1,2}  [size = nz = 6]
3621         v =  {4,5,6}  [size = nz = 6]
3622 
3623 .keywords: matrix, aij, compressed row, sparse, parallel
3624 
3625 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3626           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3627 @*/
3628 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3629 {
3630   PetscErrorCode ierr;
3631 
3632  PetscFunctionBegin;
3633   if (i[0]) {
3634     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3635   }
3636   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3637   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3638   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3639   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3640   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3641   PetscFunctionReturn(0);
3642 }
3643 
3644 #undef __FUNCT__
3645 #define __FUNCT__ "MatCreateMPIAIJ"
3646 /*@C
3647    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3648    (the default parallel PETSc format).  For good matrix assembly performance
3649    the user should preallocate the matrix storage by setting the parameters
3650    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3651    performance can be increased by more than a factor of 50.
3652 
3653    Collective on MPI_Comm
3654 
3655    Input Parameters:
3656 +  comm - MPI communicator
3657 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3658            This value should be the same as the local size used in creating the
3659            y vector for the matrix-vector product y = Ax.
3660 .  n - This value should be the same as the local size used in creating the
3661        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3662        calculated if N is given) For square matrices n is almost always m.
3663 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3664 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3665 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3666            (same value is used for all local rows)
3667 .  d_nnz - array containing the number of nonzeros in the various rows of the
3668            DIAGONAL portion of the local submatrix (possibly different for each row)
3669            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3670            The size of this array is equal to the number of local rows, i.e 'm'.
3671            You must leave room for the diagonal entry even if it is zero.
3672 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3673            submatrix (same value is used for all local rows).
3674 -  o_nnz - array containing the number of nonzeros in the various rows of the
3675            OFF-DIAGONAL portion of the local submatrix (possibly different for
3676            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3677            structure. The size of this array is equal to the number
3678            of local rows, i.e 'm'.
3679 
3680    Output Parameter:
3681 .  A - the matrix
3682 
3683    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3684    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3685    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3686 
3687    Notes:
3688    If the *_nnz parameter is given then the *_nz parameter is ignored
3689 
3690    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3691    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3692    storage requirements for this matrix.
3693 
3694    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3695    processor than it must be used on all processors that share the object for
3696    that argument.
3697 
3698    The user MUST specify either the local or global matrix dimensions
3699    (possibly both).
3700 
3701    The parallel matrix is partitioned across processors such that the
3702    first m0 rows belong to process 0, the next m1 rows belong to
3703    process 1, the next m2 rows belong to process 2 etc.. where
3704    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3705    values corresponding to [m x N] submatrix.
3706 
3707    The columns are logically partitioned with the n0 columns belonging
3708    to 0th partition, the next n1 columns belonging to the next
3709    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3710 
3711    The DIAGONAL portion of the local submatrix on any given processor
3712    is the submatrix corresponding to the rows and columns m,n
3713    corresponding to the given processor. i.e diagonal matrix on
3714    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3715    etc. The remaining portion of the local submatrix [m x (N-n)]
3716    constitute the OFF-DIAGONAL portion. The example below better
3717    illustrates this concept.
3718 
3719    For a square global matrix we define each processor's diagonal portion
3720    to be its local rows and the corresponding columns (a square submatrix);
3721    each processor's off-diagonal portion encompasses the remainder of the
3722    local matrix (a rectangular submatrix).
3723 
3724    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3725 
3726    When calling this routine with a single process communicator, a matrix of
3727    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3728    type of communicator, use the construction mechanism:
3729      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3730 
3731    By default, this format uses inodes (identical nodes) when possible.
3732    We search for consecutive rows with the same nonzero structure, thereby
3733    reusing matrix information to achieve increased efficiency.
3734 
3735    Options Database Keys:
3736 +  -mat_no_inode  - Do not use inodes
3737 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3738 -  -mat_aij_oneindex - Internally use indexing starting at 1
3739         rather than 0.  Note that when calling MatSetValues(),
3740         the user still MUST index entries starting at 0!
3741 
3742 
3743    Example usage:
3744 
3745    Consider the following 8x8 matrix with 34 non-zero values, that is
3746    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3747    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3748    as follows:
3749 
3750 .vb
3751             1  2  0  |  0  3  0  |  0  4
3752     Proc0   0  5  6  |  7  0  0  |  8  0
3753             9  0 10  | 11  0  0  | 12  0
3754     -------------------------------------
3755            13  0 14  | 15 16 17  |  0  0
3756     Proc1   0 18  0  | 19 20 21  |  0  0
3757             0  0  0  | 22 23  0  | 24  0
3758     -------------------------------------
3759     Proc2  25 26 27  |  0  0 28  | 29  0
3760            30  0  0  | 31 32 33  |  0 34
3761 .ve
3762 
3763    This can be represented as a collection of submatrices as:
3764 
3765 .vb
3766       A B C
3767       D E F
3768       G H I
3769 .ve
3770 
3771    Where the submatrices A,B,C are owned by proc0, D,E,F are
3772    owned by proc1, G,H,I are owned by proc2.
3773 
3774    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3775    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3776    The 'M','N' parameters are 8,8, and have the same values on all procs.
3777 
3778    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3779    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3780    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3781    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3782    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3783    matrix, ans [DF] as another SeqAIJ matrix.
3784 
3785    When d_nz, o_nz parameters are specified, d_nz storage elements are
3786    allocated for every row of the local diagonal submatrix, and o_nz
3787    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3788    One way to choose d_nz and o_nz is to use the max nonzerors per local
3789    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3790    In this case, the values of d_nz,o_nz are:
3791 .vb
3792      proc0 : dnz = 2, o_nz = 2
3793      proc1 : dnz = 3, o_nz = 2
3794      proc2 : dnz = 1, o_nz = 4
3795 .ve
3796    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3797    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3798    for proc3. i.e we are using 12+15+10=37 storage locations to store
3799    34 values.
3800 
3801    When d_nnz, o_nnz parameters are specified, the storage is specified
3802    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3803    In the above case the values for d_nnz,o_nnz are:
3804 .vb
3805      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3806      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3807      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3808 .ve
3809    Here the space allocated is sum of all the above values i.e 34, and
3810    hence pre-allocation is perfect.
3811 
3812    Level: intermediate
3813 
3814 .keywords: matrix, aij, compressed row, sparse, parallel
3815 
3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3817           MPIAIJ, MatCreateMPIAIJWithArrays()
3818 @*/
3819 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3820 {
3821   PetscErrorCode ierr;
3822   PetscMPIInt    size;
3823 
3824   PetscFunctionBegin;
3825   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3826   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3827   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3828   if (size > 1) {
3829     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3830     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3831   } else {
3832     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3833     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3834   }
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 #undef __FUNCT__
3839 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3840 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3841 {
3842   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3843 
3844   PetscFunctionBegin;
3845   *Ad     = a->A;
3846   *Ao     = a->B;
3847   *colmap = a->garray;
3848   PetscFunctionReturn(0);
3849 }
3850 
3851 #undef __FUNCT__
3852 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3853 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3854 {
3855   PetscErrorCode ierr;
3856   PetscInt       i;
3857   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3858 
3859   PetscFunctionBegin;
3860   if (coloring->ctype == IS_COLORING_GLOBAL) {
3861     ISColoringValue *allcolors,*colors;
3862     ISColoring      ocoloring;
3863 
3864     /* set coloring for diagonal portion */
3865     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3866 
3867     /* set coloring for off-diagonal portion */
3868     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3869     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3870     for (i=0; i<a->B->cmap->n; i++) {
3871       colors[i] = allcolors[a->garray[i]];
3872     }
3873     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3874     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3875     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3876     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3877   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3878     ISColoringValue *colors;
3879     PetscInt        *larray;
3880     ISColoring      ocoloring;
3881 
3882     /* set coloring for diagonal portion */
3883     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3884     for (i=0; i<a->A->cmap->n; i++) {
3885       larray[i] = i + A->cmap->rstart;
3886     }
3887     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3888     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3889     for (i=0; i<a->A->cmap->n; i++) {
3890       colors[i] = coloring->colors[larray[i]];
3891     }
3892     ierr = PetscFree(larray);CHKERRQ(ierr);
3893     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3894     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3895     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3896 
3897     /* set coloring for off-diagonal portion */
3898     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3899     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3900     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3901     for (i=0; i<a->B->cmap->n; i++) {
3902       colors[i] = coloring->colors[larray[i]];
3903     }
3904     ierr = PetscFree(larray);CHKERRQ(ierr);
3905     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3906     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3907     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3908   } else {
3909     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3910   }
3911 
3912   PetscFunctionReturn(0);
3913 }
3914 
3915 #if defined(PETSC_HAVE_ADIC)
3916 #undef __FUNCT__
3917 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3918 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3919 {
3920   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3921   PetscErrorCode ierr;
3922 
3923   PetscFunctionBegin;
3924   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3925   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3926   PetscFunctionReturn(0);
3927 }
3928 #endif
3929 
3930 #undef __FUNCT__
3931 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3932 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3933 {
3934   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3935   PetscErrorCode ierr;
3936 
3937   PetscFunctionBegin;
3938   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3939   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942 
3943 #undef __FUNCT__
3944 #define __FUNCT__ "MatMerge"
3945 /*@
3946       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3947                  matrices from each processor
3948 
3949     Collective on MPI_Comm
3950 
3951    Input Parameters:
3952 +    comm - the communicators the parallel matrix will live on
3953 .    inmat - the input sequential matrices
3954 .    n - number of local columns (or PETSC_DECIDE)
3955 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3956 
3957    Output Parameter:
3958 .    outmat - the parallel matrix generated
3959 
3960     Level: advanced
3961 
3962    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3963 
3964 @*/
3965 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3966 {
3967   PetscErrorCode ierr;
3968   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3969   PetscInt       *indx;
3970   PetscScalar    *values;
3971 
3972   PetscFunctionBegin;
3973   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3974   if (scall == MAT_INITIAL_MATRIX){
3975     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3976     if (n == PETSC_DECIDE){
3977       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3978     }
3979     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3980     rstart -= m;
3981 
3982     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3983     for (i=0;i<m;i++) {
3984       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3985       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3986       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3987     }
3988     /* This routine will ONLY return MPIAIJ type matrix */
3989     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3990     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3991     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3992     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3993     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3994 
3995   } else if (scall == MAT_REUSE_MATRIX){
3996     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3997   } else {
3998     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3999   }
4000 
4001   for (i=0;i<m;i++) {
4002     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4003     Ii    = i + rstart;
4004     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4005     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4006   }
4007   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4008   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4009   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4010 
4011   PetscFunctionReturn(0);
4012 }
4013 
4014 #undef __FUNCT__
4015 #define __FUNCT__ "MatFileSplit"
4016 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4017 {
4018   PetscErrorCode    ierr;
4019   PetscMPIInt       rank;
4020   PetscInt          m,N,i,rstart,nnz;
4021   size_t            len;
4022   const PetscInt    *indx;
4023   PetscViewer       out;
4024   char              *name;
4025   Mat               B;
4026   const PetscScalar *values;
4027 
4028   PetscFunctionBegin;
4029   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4030   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4031   /* Should this be the type of the diagonal block of A? */
4032   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4033   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4034   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4035   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4036   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4037   for (i=0;i<m;i++) {
4038     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4039     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4040     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4041   }
4042   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4043   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4044 
4045   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4046   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4047   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4048   sprintf(name,"%s.%d",outfile,rank);
4049   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4050   ierr = PetscFree(name);
4051   ierr = MatView(B,out);CHKERRQ(ierr);
4052   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4053   ierr = MatDestroy(B);CHKERRQ(ierr);
4054   PetscFunctionReturn(0);
4055 }
4056 
4057 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4058 #undef __FUNCT__
4059 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4060 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4061 {
4062   PetscErrorCode       ierr;
4063   Mat_Merge_SeqsToMPI  *merge;
4064   PetscContainer       container;
4065 
4066   PetscFunctionBegin;
4067   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4068   if (container) {
4069     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4070     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4071     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4072     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4073     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4074     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4075     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4076     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4077     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4078     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4079     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4080     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4081     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4082     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4083 
4084     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4085     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4086   }
4087   ierr = PetscFree(merge);CHKERRQ(ierr);
4088 
4089   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4090   PetscFunctionReturn(0);
4091 }
4092 
4093 #include "../src/mat/utils/freespace.h"
4094 #include "petscbt.h"
4095 
4096 #undef __FUNCT__
4097 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4098 /*@C
4099       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4100                  matrices from each processor
4101 
4102     Collective on MPI_Comm
4103 
4104    Input Parameters:
4105 +    comm - the communicators the parallel matrix will live on
4106 .    seqmat - the input sequential matrices
4107 .    m - number of local rows (or PETSC_DECIDE)
4108 .    n - number of local columns (or PETSC_DECIDE)
4109 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4110 
4111    Output Parameter:
4112 .    mpimat - the parallel matrix generated
4113 
4114     Level: advanced
4115 
4116    Notes:
4117      The dimensions of the sequential matrix in each processor MUST be the same.
4118      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4119      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4120 @*/
4121 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4122 {
4123   PetscErrorCode       ierr;
4124   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4125   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4126   PetscMPIInt          size,rank,taga,*len_s;
4127   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4128   PetscInt             proc,m;
4129   PetscInt             **buf_ri,**buf_rj;
4130   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4131   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4132   MPI_Request          *s_waits,*r_waits;
4133   MPI_Status           *status;
4134   MatScalar            *aa=a->a;
4135   MatScalar            **abuf_r,*ba_i;
4136   Mat_Merge_SeqsToMPI  *merge;
4137   PetscContainer       container;
4138 
4139   PetscFunctionBegin;
4140   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4141 
4142   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4143   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4144 
4145   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4146   if (container) {
4147     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4148   }
4149   bi     = merge->bi;
4150   bj     = merge->bj;
4151   buf_ri = merge->buf_ri;
4152   buf_rj = merge->buf_rj;
4153 
4154   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4155   owners = merge->rowmap->range;
4156   len_s  = merge->len_s;
4157 
4158   /* send and recv matrix values */
4159   /*-----------------------------*/
4160   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4161   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4162 
4163   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4164   for (proc=0,k=0; proc<size; proc++){
4165     if (!len_s[proc]) continue;
4166     i = owners[proc];
4167     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4168     k++;
4169   }
4170 
4171   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4172   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4173   ierr = PetscFree(status);CHKERRQ(ierr);
4174 
4175   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4176   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4177 
4178   /* insert mat values of mpimat */
4179   /*----------------------------*/
4180   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4181   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4182 
4183   for (k=0; k<merge->nrecv; k++){
4184     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4185     nrows = *(buf_ri_k[k]);
4186     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4187     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4188   }
4189 
4190   /* set values of ba */
4191   m = merge->rowmap->n;
4192   for (i=0; i<m; i++) {
4193     arow = owners[rank] + i;
4194     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4195     bnzi = bi[i+1] - bi[i];
4196     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4197 
4198     /* add local non-zero vals of this proc's seqmat into ba */
4199     anzi = ai[arow+1] - ai[arow];
4200     aj   = a->j + ai[arow];
4201     aa   = a->a + ai[arow];
4202     nextaj = 0;
4203     for (j=0; nextaj<anzi; j++){
4204       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4205         ba_i[j] += aa[nextaj++];
4206       }
4207     }
4208 
4209     /* add received vals into ba */
4210     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4211       /* i-th row */
4212       if (i == *nextrow[k]) {
4213         anzi = *(nextai[k]+1) - *nextai[k];
4214         aj   = buf_rj[k] + *(nextai[k]);
4215         aa   = abuf_r[k] + *(nextai[k]);
4216         nextaj = 0;
4217         for (j=0; nextaj<anzi; j++){
4218           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4219             ba_i[j] += aa[nextaj++];
4220           }
4221         }
4222         nextrow[k]++; nextai[k]++;
4223       }
4224     }
4225     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4226   }
4227   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4228   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4229 
4230   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4231   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4232   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4233   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4234   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 #undef __FUNCT__
4239 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4240 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4241 {
4242   PetscErrorCode       ierr;
4243   Mat                  B_mpi;
4244   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4245   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4246   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4247   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4248   PetscInt             len,proc,*dnz,*onz;
4249   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4250   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4251   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4252   MPI_Status           *status;
4253   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4254   PetscBT              lnkbt;
4255   Mat_Merge_SeqsToMPI  *merge;
4256   PetscContainer       container;
4257 
4258   PetscFunctionBegin;
4259   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4260 
4261   /* make sure it is a PETSc comm */
4262   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4263   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4264   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4265 
4266   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4267   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4268 
4269   /* determine row ownership */
4270   /*---------------------------------------------------------*/
4271   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4272   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4273   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4274   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4275   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4276   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4277   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4278 
4279   m      = merge->rowmap->n;
4280   M      = merge->rowmap->N;
4281   owners = merge->rowmap->range;
4282 
4283   /* determine the number of messages to send, their lengths */
4284   /*---------------------------------------------------------*/
4285   len_s  = merge->len_s;
4286 
4287   len = 0;  /* length of buf_si[] */
4288   merge->nsend = 0;
4289   for (proc=0; proc<size; proc++){
4290     len_si[proc] = 0;
4291     if (proc == rank){
4292       len_s[proc] = 0;
4293     } else {
4294       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4295       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4296     }
4297     if (len_s[proc]) {
4298       merge->nsend++;
4299       nrows = 0;
4300       for (i=owners[proc]; i<owners[proc+1]; i++){
4301         if (ai[i+1] > ai[i]) nrows++;
4302       }
4303       len_si[proc] = 2*(nrows+1);
4304       len += len_si[proc];
4305     }
4306   }
4307 
4308   /* determine the number and length of messages to receive for ij-structure */
4309   /*-------------------------------------------------------------------------*/
4310   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4311   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4312 
4313   /* post the Irecv of j-structure */
4314   /*-------------------------------*/
4315   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4316   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4317 
4318   /* post the Isend of j-structure */
4319   /*--------------------------------*/
4320   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4321 
4322   for (proc=0, k=0; proc<size; proc++){
4323     if (!len_s[proc]) continue;
4324     i = owners[proc];
4325     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4326     k++;
4327   }
4328 
4329   /* receives and sends of j-structure are complete */
4330   /*------------------------------------------------*/
4331   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4332   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4333 
4334   /* send and recv i-structure */
4335   /*---------------------------*/
4336   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4337   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4338 
4339   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4340   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4341   for (proc=0,k=0; proc<size; proc++){
4342     if (!len_s[proc]) continue;
4343     /* form outgoing message for i-structure:
4344          buf_si[0]:                 nrows to be sent
4345                [1:nrows]:           row index (global)
4346                [nrows+1:2*nrows+1]: i-structure index
4347     */
4348     /*-------------------------------------------*/
4349     nrows = len_si[proc]/2 - 1;
4350     buf_si_i    = buf_si + nrows+1;
4351     buf_si[0]   = nrows;
4352     buf_si_i[0] = 0;
4353     nrows = 0;
4354     for (i=owners[proc]; i<owners[proc+1]; i++){
4355       anzi = ai[i+1] - ai[i];
4356       if (anzi) {
4357         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4358         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4359         nrows++;
4360       }
4361     }
4362     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4363     k++;
4364     buf_si += len_si[proc];
4365   }
4366 
4367   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4368   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4369 
4370   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4371   for (i=0; i<merge->nrecv; i++){
4372     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4373   }
4374 
4375   ierr = PetscFree(len_si);CHKERRQ(ierr);
4376   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4377   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4378   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4379   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4380   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4381   ierr = PetscFree(status);CHKERRQ(ierr);
4382 
4383   /* compute a local seq matrix in each processor */
4384   /*----------------------------------------------*/
4385   /* allocate bi array and free space for accumulating nonzero column info */
4386   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4387   bi[0] = 0;
4388 
4389   /* create and initialize a linked list */
4390   nlnk = N+1;
4391   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4392 
4393   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4394   len = 0;
4395   len  = ai[owners[rank+1]] - ai[owners[rank]];
4396   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4397   current_space = free_space;
4398 
4399   /* determine symbolic info for each local row */
4400   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4401 
4402   for (k=0; k<merge->nrecv; k++){
4403     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4404     nrows = *buf_ri_k[k];
4405     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4406     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4407   }
4408 
4409   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4410   len = 0;
4411   for (i=0;i<m;i++) {
4412     bnzi   = 0;
4413     /* add local non-zero cols of this proc's seqmat into lnk */
4414     arow   = owners[rank] + i;
4415     anzi   = ai[arow+1] - ai[arow];
4416     aj     = a->j + ai[arow];
4417     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4418     bnzi += nlnk;
4419     /* add received col data into lnk */
4420     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4421       if (i == *nextrow[k]) { /* i-th row */
4422         anzi = *(nextai[k]+1) - *nextai[k];
4423         aj   = buf_rj[k] + *nextai[k];
4424         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4425         bnzi += nlnk;
4426         nextrow[k]++; nextai[k]++;
4427       }
4428     }
4429     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4430 
4431     /* if free space is not available, make more free space */
4432     if (current_space->local_remaining<bnzi) {
4433       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4434       nspacedouble++;
4435     }
4436     /* copy data into free space, then initialize lnk */
4437     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4438     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4439 
4440     current_space->array           += bnzi;
4441     current_space->local_used      += bnzi;
4442     current_space->local_remaining -= bnzi;
4443 
4444     bi[i+1] = bi[i] + bnzi;
4445   }
4446 
4447   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4448 
4449   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4450   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4451   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4452 
4453   /* create symbolic parallel matrix B_mpi */
4454   /*---------------------------------------*/
4455   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4456   if (n==PETSC_DECIDE) {
4457     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4458   } else {
4459     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4460   }
4461   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4462   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4463   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4464 
4465   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4466   B_mpi->assembled     = PETSC_FALSE;
4467   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4468   merge->bi            = bi;
4469   merge->bj            = bj;
4470   merge->buf_ri        = buf_ri;
4471   merge->buf_rj        = buf_rj;
4472   merge->coi           = PETSC_NULL;
4473   merge->coj           = PETSC_NULL;
4474   merge->owners_co     = PETSC_NULL;
4475 
4476   /* attach the supporting struct to B_mpi for reuse */
4477   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4478   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4479   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4480   *mpimat = B_mpi;
4481 
4482   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4483   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #undef __FUNCT__
4488 #define __FUNCT__ "MatMerge_SeqsToMPI"
4489 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4490 {
4491   PetscErrorCode   ierr;
4492 
4493   PetscFunctionBegin;
4494   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4495   if (scall == MAT_INITIAL_MATRIX){
4496     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4497   }
4498   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4499   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4500   PetscFunctionReturn(0);
4501 }
4502 
4503 #undef __FUNCT__
4504 #define __FUNCT__ "MatGetLocalMat"
4505 /*@
4506      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4507 
4508     Not Collective
4509 
4510    Input Parameters:
4511 +    A - the matrix
4512 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4513 
4514    Output Parameter:
4515 .    A_loc - the local sequential matrix generated
4516 
4517     Level: developer
4518 
4519 @*/
4520 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4521 {
4522   PetscErrorCode  ierr;
4523   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4524   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4525   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4526   MatScalar       *aa=a->a,*ba=b->a,*cam;
4527   PetscScalar     *ca;
4528   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4529   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4533   if (scall == MAT_INITIAL_MATRIX){
4534     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4535     ci[0] = 0;
4536     for (i=0; i<am; i++){
4537       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4538     }
4539     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4540     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4541     k = 0;
4542     for (i=0; i<am; i++) {
4543       ncols_o = bi[i+1] - bi[i];
4544       ncols_d = ai[i+1] - ai[i];
4545       /* off-diagonal portion of A */
4546       for (jo=0; jo<ncols_o; jo++) {
4547         col = cmap[*bj];
4548         if (col >= cstart) break;
4549         cj[k]   = col; bj++;
4550         ca[k++] = *ba++;
4551       }
4552       /* diagonal portion of A */
4553       for (j=0; j<ncols_d; j++) {
4554         cj[k]   = cstart + *aj++;
4555         ca[k++] = *aa++;
4556       }
4557       /* off-diagonal portion of A */
4558       for (j=jo; j<ncols_o; j++) {
4559         cj[k]   = cmap[*bj++];
4560         ca[k++] = *ba++;
4561       }
4562     }
4563     /* put together the new matrix */
4564     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4565     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4566     /* Since these are PETSc arrays, change flags to free them as necessary. */
4567     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4568     mat->free_a  = PETSC_TRUE;
4569     mat->free_ij = PETSC_TRUE;
4570     mat->nonew   = 0;
4571   } else if (scall == MAT_REUSE_MATRIX){
4572     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4573     ci = mat->i; cj = mat->j; cam = mat->a;
4574     for (i=0; i<am; i++) {
4575       /* off-diagonal portion of A */
4576       ncols_o = bi[i+1] - bi[i];
4577       for (jo=0; jo<ncols_o; jo++) {
4578         col = cmap[*bj];
4579         if (col >= cstart) break;
4580         *cam++ = *ba++; bj++;
4581       }
4582       /* diagonal portion of A */
4583       ncols_d = ai[i+1] - ai[i];
4584       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4585       /* off-diagonal portion of A */
4586       for (j=jo; j<ncols_o; j++) {
4587         *cam++ = *ba++; bj++;
4588       }
4589     }
4590   } else {
4591     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4592   }
4593 
4594   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4595   PetscFunctionReturn(0);
4596 }
4597 
4598 #undef __FUNCT__
4599 #define __FUNCT__ "MatGetLocalMatCondensed"
4600 /*@C
4601      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4602 
4603     Not Collective
4604 
4605    Input Parameters:
4606 +    A - the matrix
4607 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4608 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4609 
4610    Output Parameter:
4611 .    A_loc - the local sequential matrix generated
4612 
4613     Level: developer
4614 
4615 @*/
4616 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4617 {
4618   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4619   PetscErrorCode    ierr;
4620   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4621   IS                isrowa,iscola;
4622   Mat               *aloc;
4623 
4624   PetscFunctionBegin;
4625   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4626   if (!row){
4627     start = A->rmap->rstart; end = A->rmap->rend;
4628     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4629   } else {
4630     isrowa = *row;
4631   }
4632   if (!col){
4633     start = A->cmap->rstart;
4634     cmap  = a->garray;
4635     nzA   = a->A->cmap->n;
4636     nzB   = a->B->cmap->n;
4637     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4638     ncols = 0;
4639     for (i=0; i<nzB; i++) {
4640       if (cmap[i] < start) idx[ncols++] = cmap[i];
4641       else break;
4642     }
4643     imark = i;
4644     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4645     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4646     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4647   } else {
4648     iscola = *col;
4649   }
4650   if (scall != MAT_INITIAL_MATRIX){
4651     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4652     aloc[0] = *A_loc;
4653   }
4654   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4655   *A_loc = aloc[0];
4656   ierr = PetscFree(aloc);CHKERRQ(ierr);
4657   if (!row){
4658     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4659   }
4660   if (!col){
4661     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4662   }
4663   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4664   PetscFunctionReturn(0);
4665 }
4666 
4667 #undef __FUNCT__
4668 #define __FUNCT__ "MatGetBrowsOfAcols"
4669 /*@C
4670     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4671 
4672     Collective on Mat
4673 
4674    Input Parameters:
4675 +    A,B - the matrices in mpiaij format
4676 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4677 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4678 
4679    Output Parameter:
4680 +    rowb, colb - index sets of rows and columns of B to extract
4681 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4682 -    B_seq - the sequential matrix generated
4683 
4684     Level: developer
4685 
4686 @*/
4687 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4688 {
4689   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4690   PetscErrorCode    ierr;
4691   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4692   IS                isrowb,iscolb;
4693   Mat               *bseq;
4694 
4695   PetscFunctionBegin;
4696   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4697     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4698   }
4699   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4700 
4701   if (scall == MAT_INITIAL_MATRIX){
4702     start = A->cmap->rstart;
4703     cmap  = a->garray;
4704     nzA   = a->A->cmap->n;
4705     nzB   = a->B->cmap->n;
4706     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4707     ncols = 0;
4708     for (i=0; i<nzB; i++) {  /* row < local row index */
4709       if (cmap[i] < start) idx[ncols++] = cmap[i];
4710       else break;
4711     }
4712     imark = i;
4713     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4714     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4715     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4716     *brstart = imark;
4717     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4718   } else {
4719     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4720     isrowb = *rowb; iscolb = *colb;
4721     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4722     bseq[0] = *B_seq;
4723   }
4724   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4725   *B_seq = bseq[0];
4726   ierr = PetscFree(bseq);CHKERRQ(ierr);
4727   if (!rowb){
4728     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4729   } else {
4730     *rowb = isrowb;
4731   }
4732   if (!colb){
4733     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4734   } else {
4735     *colb = iscolb;
4736   }
4737   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4738   PetscFunctionReturn(0);
4739 }
4740 
4741 #undef __FUNCT__
4742 #define __FUNCT__ "MatGetBrowsOfAoCols"
4743 /*@C
4744     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4745     of the OFF-DIAGONAL portion of local A
4746 
4747     Collective on Mat
4748 
4749    Input Parameters:
4750 +    A,B - the matrices in mpiaij format
4751 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4752 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4753 .    startsj_r - similar to startsj for receives
4754 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4755 
4756    Output Parameter:
4757 +    B_oth - the sequential matrix generated
4758 
4759     Level: developer
4760 
4761 @*/
4762 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4763 {
4764   VecScatter_MPI_General *gen_to,*gen_from;
4765   PetscErrorCode         ierr;
4766   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4767   Mat_SeqAIJ             *b_oth;
4768   VecScatter             ctx=a->Mvctx;
4769   MPI_Comm               comm=((PetscObject)ctx)->comm;
4770   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4771   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4772   PetscScalar            *rvalues,*svalues;
4773   MatScalar              *b_otha,*bufa,*bufA;
4774   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4775   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4776   MPI_Status             *sstatus,rstatus;
4777   PetscMPIInt            jj;
4778   PetscInt               *cols,sbs,rbs;
4779   PetscScalar            *vals;
4780 
4781   PetscFunctionBegin;
4782   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4783     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4784   }
4785   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4787 
4788   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4789   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4790   rvalues  = gen_from->values; /* holds the length of receiving row */
4791   svalues  = gen_to->values;   /* holds the length of sending row */
4792   nrecvs   = gen_from->n;
4793   nsends   = gen_to->n;
4794 
4795   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4796   srow     = gen_to->indices;   /* local row index to be sent */
4797   sstarts  = gen_to->starts;
4798   sprocs   = gen_to->procs;
4799   sstatus  = gen_to->sstatus;
4800   sbs      = gen_to->bs;
4801   rstarts  = gen_from->starts;
4802   rprocs   = gen_from->procs;
4803   rbs      = gen_from->bs;
4804 
4805   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4806   if (scall == MAT_INITIAL_MATRIX){
4807     /* i-array */
4808     /*---------*/
4809     /*  post receives */
4810     for (i=0; i<nrecvs; i++){
4811       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4812       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4813       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4814     }
4815 
4816     /* pack the outgoing message */
4817     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4818     sstartsj[0] = 0;  rstartsj[0] = 0;
4819     len = 0; /* total length of j or a array to be sent */
4820     k = 0;
4821     for (i=0; i<nsends; i++){
4822       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4823       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4824       for (j=0; j<nrows; j++) {
4825         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4826         for (l=0; l<sbs; l++){
4827           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4828           rowlen[j*sbs+l] = ncols;
4829           len += ncols;
4830           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4831         }
4832         k++;
4833       }
4834       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4835       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4836     }
4837     /* recvs and sends of i-array are completed */
4838     i = nrecvs;
4839     while (i--) {
4840       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4841     }
4842     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4843 
4844     /* allocate buffers for sending j and a arrays */
4845     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4846     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4847 
4848     /* create i-array of B_oth */
4849     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4850     b_othi[0] = 0;
4851     len = 0; /* total length of j or a array to be received */
4852     k = 0;
4853     for (i=0; i<nrecvs; i++){
4854       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4855       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4856       for (j=0; j<nrows; j++) {
4857         b_othi[k+1] = b_othi[k] + rowlen[j];
4858         len += rowlen[j]; k++;
4859       }
4860       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4861     }
4862 
4863     /* allocate space for j and a arrrays of B_oth */
4864     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4865     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4866 
4867     /* j-array */
4868     /*---------*/
4869     /*  post receives of j-array */
4870     for (i=0; i<nrecvs; i++){
4871       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4872       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4873     }
4874 
4875     /* pack the outgoing message j-array */
4876     k = 0;
4877     for (i=0; i<nsends; i++){
4878       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4879       bufJ = bufj+sstartsj[i];
4880       for (j=0; j<nrows; j++) {
4881         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4882         for (ll=0; ll<sbs; ll++){
4883           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4884           for (l=0; l<ncols; l++){
4885             *bufJ++ = cols[l];
4886           }
4887           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4888         }
4889       }
4890       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4891     }
4892 
4893     /* recvs and sends of j-array are completed */
4894     i = nrecvs;
4895     while (i--) {
4896       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4897     }
4898     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4899   } else if (scall == MAT_REUSE_MATRIX){
4900     sstartsj = *startsj;
4901     rstartsj = *startsj_r;
4902     bufa     = *bufa_ptr;
4903     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4904     b_otha   = b_oth->a;
4905   } else {
4906     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4907   }
4908 
4909   /* a-array */
4910   /*---------*/
4911   /*  post receives of a-array */
4912   for (i=0; i<nrecvs; i++){
4913     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4914     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4915   }
4916 
4917   /* pack the outgoing message a-array */
4918   k = 0;
4919   for (i=0; i<nsends; i++){
4920     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4921     bufA = bufa+sstartsj[i];
4922     for (j=0; j<nrows; j++) {
4923       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4924       for (ll=0; ll<sbs; ll++){
4925         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4926         for (l=0; l<ncols; l++){
4927           *bufA++ = vals[l];
4928         }
4929         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4930       }
4931     }
4932     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4933   }
4934   /* recvs and sends of a-array are completed */
4935   i = nrecvs;
4936   while (i--) {
4937     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4938   }
4939   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4940   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4941 
4942   if (scall == MAT_INITIAL_MATRIX){
4943     /* put together the new matrix */
4944     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4945 
4946     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4947     /* Since these are PETSc arrays, change flags to free them as necessary. */
4948     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4949     b_oth->free_a  = PETSC_TRUE;
4950     b_oth->free_ij = PETSC_TRUE;
4951     b_oth->nonew   = 0;
4952 
4953     ierr = PetscFree(bufj);CHKERRQ(ierr);
4954     if (!startsj || !bufa_ptr){
4955       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4956       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4957     } else {
4958       *startsj   = sstartsj;
4959       *startsj_r = rstartsj;
4960       *bufa_ptr  = bufa;
4961     }
4962   }
4963   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4964   PetscFunctionReturn(0);
4965 }
4966 
4967 #undef __FUNCT__
4968 #define __FUNCT__ "MatGetCommunicationStructs"
4969 /*@C
4970   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4971 
4972   Not Collective
4973 
4974   Input Parameters:
4975 . A - The matrix in mpiaij format
4976 
4977   Output Parameter:
4978 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4979 . colmap - A map from global column index to local index into lvec
4980 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4981 
4982   Level: developer
4983 
4984 @*/
4985 #if defined (PETSC_USE_CTABLE)
4986 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4987 #else
4988 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4989 #endif
4990 {
4991   Mat_MPIAIJ *a;
4992 
4993   PetscFunctionBegin;
4994   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
4995   PetscValidPointer(lvec, 2);
4996   PetscValidPointer(colmap, 3);
4997   PetscValidPointer(multScatter, 4);
4998   a = (Mat_MPIAIJ *) A->data;
4999   if (lvec) *lvec = a->lvec;
5000   if (colmap) *colmap = a->colmap;
5001   if (multScatter) *multScatter = a->Mvctx;
5002   PetscFunctionReturn(0);
5003 }
5004 
5005 EXTERN_C_BEGIN
5006 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5007 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5008 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5009 EXTERN_C_END
5010 
5011 #undef __FUNCT__
5012 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5013 /*
5014     Computes (B'*A')' since computing B*A directly is untenable
5015 
5016                n                       p                          p
5017         (              )       (              )         (                  )
5018       m (      A       )  *  n (       B      )   =   m (         C        )
5019         (              )       (              )         (                  )
5020 
5021 */
5022 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5023 {
5024   PetscErrorCode     ierr;
5025   Mat                At,Bt,Ct;
5026 
5027   PetscFunctionBegin;
5028   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5029   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5030   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5031   ierr = MatDestroy(At);CHKERRQ(ierr);
5032   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5033   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5034   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5035   PetscFunctionReturn(0);
5036 }
5037 
5038 #undef __FUNCT__
5039 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5040 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5041 {
5042   PetscErrorCode ierr;
5043   PetscInt       m=A->rmap->n,n=B->cmap->n;
5044   Mat            Cmat;
5045 
5046   PetscFunctionBegin;
5047   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5048   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5049   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5050   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5051   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5052   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5053   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5054   *C   = Cmat;
5055   PetscFunctionReturn(0);
5056 }
5057 
5058 /* ----------------------------------------------------------------*/
5059 #undef __FUNCT__
5060 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5061 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5062 {
5063   PetscErrorCode ierr;
5064 
5065   PetscFunctionBegin;
5066   if (scall == MAT_INITIAL_MATRIX){
5067     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5068   }
5069   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5070   PetscFunctionReturn(0);
5071 }
5072 
5073 EXTERN_C_BEGIN
5074 #if defined(PETSC_HAVE_MUMPS)
5075 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5076 #endif
5077 #if defined(PETSC_HAVE_PASTIX)
5078 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5079 #endif
5080 #if defined(PETSC_HAVE_SUPERLU_DIST)
5081 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5082 #endif
5083 #if defined(PETSC_HAVE_SPOOLES)
5084 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5085 #endif
5086 EXTERN_C_END
5087 
5088 /*MC
5089    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5090 
5091    Options Database Keys:
5092 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5093 
5094   Level: beginner
5095 
5096 .seealso: MatCreateMPIAIJ()
5097 M*/
5098 
5099 EXTERN_C_BEGIN
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatCreate_MPIAIJ"
5102 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5103 {
5104   Mat_MPIAIJ     *b;
5105   PetscErrorCode ierr;
5106   PetscMPIInt    size;
5107 
5108   PetscFunctionBegin;
5109   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5110 
5111   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5112   B->data         = (void*)b;
5113   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5114   B->rmap->bs     = 1;
5115   B->assembled    = PETSC_FALSE;
5116   B->mapping      = 0;
5117 
5118   B->insertmode   = NOT_SET_VALUES;
5119   b->size         = size;
5120   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5121 
5122   /* build cache for off array entries formed */
5123   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5124   b->donotstash  = PETSC_FALSE;
5125   b->colmap      = 0;
5126   b->garray      = 0;
5127   b->roworiented = PETSC_TRUE;
5128 
5129   /* stuff used for matrix vector multiply */
5130   b->lvec      = PETSC_NULL;
5131   b->Mvctx     = PETSC_NULL;
5132 
5133   /* stuff for MatGetRow() */
5134   b->rowindices   = 0;
5135   b->rowvalues    = 0;
5136   b->getrowactive = PETSC_FALSE;
5137 
5138 #if defined(PETSC_HAVE_SPOOLES)
5139   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5140                                      "MatGetFactor_mpiaij_spooles",
5141                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5142 #endif
5143 #if defined(PETSC_HAVE_MUMPS)
5144   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5145                                      "MatGetFactor_aij_mumps",
5146                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5147 #endif
5148 #if defined(PETSC_HAVE_PASTIX)
5149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5150 					   "MatGetFactor_mpiaij_pastix",
5151 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5152 #endif
5153 #if defined(PETSC_HAVE_SUPERLU_DIST)
5154   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5155                                      "MatGetFactor_mpiaij_superlu_dist",
5156                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5157 #endif
5158   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5159                                      "MatStoreValues_MPIAIJ",
5160                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5161   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5162                                      "MatRetrieveValues_MPIAIJ",
5163                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5164   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5165 				     "MatGetDiagonalBlock_MPIAIJ",
5166                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5167   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5168 				     "MatIsTranspose_MPIAIJ",
5169 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5170   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5171 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5172 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5173   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5174 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5175 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5176   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5177 				     "MatDiagonalScaleLocal_MPIAIJ",
5178 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5179   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5180                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5181                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5182   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5183                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5184                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5185   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5186                                      "MatConvert_MPIAIJ_MPISBAIJ",
5187                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5188   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5189                                      "MatMatMult_MPIDense_MPIAIJ",
5190                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5191   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5192                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5193                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5194   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5195                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5196                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5197   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5198   PetscFunctionReturn(0);
5199 }
5200 EXTERN_C_END
5201 
5202 #undef __FUNCT__
5203 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5204 /*@
5205      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5206          and "off-diagonal" part of the matrix in CSR format.
5207 
5208    Collective on MPI_Comm
5209 
5210    Input Parameters:
5211 +  comm - MPI communicator
5212 .  m - number of local rows (Cannot be PETSC_DECIDE)
5213 .  n - This value should be the same as the local size used in creating the
5214        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5215        calculated if N is given) For square matrices n is almost always m.
5216 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5217 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5218 .   i - row indices for "diagonal" portion of matrix
5219 .   j - column indices
5220 .   a - matrix values
5221 .   oi - row indices for "off-diagonal" portion of matrix
5222 .   oj - column indices
5223 -   oa - matrix values
5224 
5225    Output Parameter:
5226 .   mat - the matrix
5227 
5228    Level: advanced
5229 
5230    Notes:
5231        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5232 
5233        The i and j indices are 0 based
5234 
5235        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5236 
5237        This sets local rows and cannot be used to set off-processor values.
5238 
5239        You cannot later use MatSetValues() to change values in this matrix.
5240 
5241 .keywords: matrix, aij, compressed row, sparse, parallel
5242 
5243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5244           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5245 @*/
5246 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5247 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5248 {
5249   PetscErrorCode ierr;
5250   Mat_MPIAIJ     *maij;
5251 
5252  PetscFunctionBegin;
5253   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5254   if (i[0]) {
5255     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5256   }
5257   if (oi[0]) {
5258     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5259   }
5260   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5261   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5262   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5263   maij = (Mat_MPIAIJ*) (*mat)->data;
5264   maij->donotstash     = PETSC_TRUE;
5265   (*mat)->preallocated = PETSC_TRUE;
5266 
5267   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5268   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5269   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5270   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5271 
5272   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5273   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5274 
5275   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5276   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5277   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5278   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5279 
5280   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5281   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5282   PetscFunctionReturn(0);
5283 }
5284 
5285 /*
5286     Special version for direct calls from Fortran
5287 */
5288 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5289 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5290 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5291 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5292 #endif
5293 
5294 /* Change these macros so can be used in void function */
5295 #undef CHKERRQ
5296 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5297 #undef SETERRQ2
5298 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5299 #undef SETERRQ
5300 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5301 
5302 EXTERN_C_BEGIN
5303 #undef __FUNCT__
5304 #define __FUNCT__ "matsetvaluesmpiaij_"
5305 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5306 {
5307   Mat             mat = *mmat;
5308   PetscInt        m = *mm, n = *mn;
5309   InsertMode      addv = *maddv;
5310   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5311   PetscScalar     value;
5312   PetscErrorCode  ierr;
5313 
5314   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5315   if (mat->insertmode == NOT_SET_VALUES) {
5316     mat->insertmode = addv;
5317   }
5318 #if defined(PETSC_USE_DEBUG)
5319   else if (mat->insertmode != addv) {
5320     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5321   }
5322 #endif
5323   {
5324   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5325   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5326   PetscBool       roworiented = aij->roworiented;
5327 
5328   /* Some Variables required in the macro */
5329   Mat             A = aij->A;
5330   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5331   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5332   MatScalar       *aa = a->a;
5333   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5334   Mat             B = aij->B;
5335   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5336   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5337   MatScalar       *ba = b->a;
5338 
5339   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5340   PetscInt        nonew = a->nonew;
5341   MatScalar       *ap1,*ap2;
5342 
5343   PetscFunctionBegin;
5344   for (i=0; i<m; i++) {
5345     if (im[i] < 0) continue;
5346 #if defined(PETSC_USE_DEBUG)
5347     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5348 #endif
5349     if (im[i] >= rstart && im[i] < rend) {
5350       row      = im[i] - rstart;
5351       lastcol1 = -1;
5352       rp1      = aj + ai[row];
5353       ap1      = aa + ai[row];
5354       rmax1    = aimax[row];
5355       nrow1    = ailen[row];
5356       low1     = 0;
5357       high1    = nrow1;
5358       lastcol2 = -1;
5359       rp2      = bj + bi[row];
5360       ap2      = ba + bi[row];
5361       rmax2    = bimax[row];
5362       nrow2    = bilen[row];
5363       low2     = 0;
5364       high2    = nrow2;
5365 
5366       for (j=0; j<n; j++) {
5367         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5368         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5369         if (in[j] >= cstart && in[j] < cend){
5370           col = in[j] - cstart;
5371           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5372         } else if (in[j] < 0) continue;
5373 #if defined(PETSC_USE_DEBUG)
5374         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5375 #endif
5376         else {
5377           if (mat->was_assembled) {
5378             if (!aij->colmap) {
5379               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5380             }
5381 #if defined (PETSC_USE_CTABLE)
5382             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5383 	    col--;
5384 #else
5385             col = aij->colmap[in[j]] - 1;
5386 #endif
5387             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5388               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5389               col =  in[j];
5390               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5391               B = aij->B;
5392               b = (Mat_SeqAIJ*)B->data;
5393               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5394               rp2      = bj + bi[row];
5395               ap2      = ba + bi[row];
5396               rmax2    = bimax[row];
5397               nrow2    = bilen[row];
5398               low2     = 0;
5399               high2    = nrow2;
5400               bm       = aij->B->rmap->n;
5401               ba = b->a;
5402             }
5403           } else col = in[j];
5404           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5405         }
5406       }
5407     } else {
5408       if (!aij->donotstash) {
5409         if (roworiented) {
5410           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5411         } else {
5412           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5413         }
5414       }
5415     }
5416   }}
5417   PetscFunctionReturnVoid();
5418 }
5419 EXTERN_C_END
5420 
5421