xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a79edbef9e693da3f78450db6f40863c07a6856b)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "petscblaslapack.h"
5 
6 #undef __FUNCT__
7 #define __FUNCT__ "MatDistribute_MPIAIJ"
8 /*
9     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
10     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
11 
12     Only for square matrices
13 */
14 PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
15 {
16   PetscMPIInt    rank,size;
17   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld;
18   PetscErrorCode ierr;
19   Mat            mat;
20   Mat_SeqAIJ     *gmata;
21   PetscMPIInt    tag;
22   MPI_Status     status;
23   PetscBool      aij;
24   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
25 
26   PetscFunctionBegin;
27   CHKMEMQ;
28   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
29   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
30   if (!rank) {
31     ierr = PetscTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
32     if (!aij) SETERRQ1(((PetscObject)gmat)->comm,PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
33   }
34   if (reuse == MAT_INITIAL_MATRIX) {
35     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
36     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
37     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
38     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
39     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
40     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
41     rowners[0] = 0;
42     for (i=2; i<=size; i++) {
43       rowners[i] += rowners[i-1];
44     }
45     rstart = rowners[rank];
46     rend   = rowners[rank+1];
47     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
48     if (!rank) {
49       gmata = (Mat_SeqAIJ*) gmat->data;
50       /* send row lengths to all processors */
51       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
52       for (i=1; i<size; i++) {
53 	ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
54       }
55       /* determine number diagonal and off-diagonal counts */
56       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
57       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
58       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
59       jj = 0;
60       for (i=0; i<m; i++) {
61 	for (j=0; j<dlens[i]; j++) {
62           if (gmata->j[jj] < rstart) ld[i]++;
63 	  if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
64 	  jj++;
65 	}
66       }
67       /* send column indices to other processes */
68       for (i=1; i<size; i++) {
69 	nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
70 	ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
71 	ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
72       }
73 
74       /* send numerical values to other processes */
75       for (i=1; i<size; i++) {
76         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
77         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
78       }
79       gmataa = gmata->a;
80       gmataj = gmata->j;
81 
82     } else {
83       /* receive row lengths */
84       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
85       /* receive column indices */
86       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
87       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
88       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
89       /* determine number diagonal and off-diagonal counts */
90       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
91       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
92       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
93       jj = 0;
94       for (i=0; i<m; i++) {
95 	for (j=0; j<dlens[i]; j++) {
96           if (gmataj[jj] < rstart) ld[i]++;
97 	  if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
98 	  jj++;
99 	}
100       }
101       /* receive numerical values */
102       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
103       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
104     }
105     /* set preallocation */
106     for (i=0; i<m; i++) {
107       dlens[i] -= olens[i];
108     }
109     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
110     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
111 
112     for (i=0; i<m; i++) {
113       dlens[i] += olens[i];
114     }
115     cnt  = 0;
116     for (i=0; i<m; i++) {
117       row  = rstart + i;
118       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
119       cnt += dlens[i];
120     }
121     if (rank) {
122       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
123     }
124     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
125     ierr = PetscFree(rowners);CHKERRQ(ierr);
126     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
127     *inmat = mat;
128   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
129     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
130     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
131     mat   = *inmat;
132     ierr  = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
133     if (!rank) {
134       /* send numerical values to other processes */
135       gmata = (Mat_SeqAIJ*) gmat->data;
136       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
137       gmataa = gmata->a;
138       for (i=1; i<size; i++) {
139         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
140         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
141       }
142       nz   = gmata->i[rowners[1]]-gmata->i[rowners[0]];
143     } else {
144       /* receive numerical values from process 0*/
145       nz   = Ad->nz + Ao->nz;
146       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
147       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
148     }
149     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
150     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
151     ad = Ad->a;
152     ao = Ao->a;
153     if (mat->rmap->n) {
154       i  = 0;
155       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
156       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
157     }
158     for (i=1; i<mat->rmap->n; i++) {
159       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
160       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
161     }
162     i--;
163     if (mat->rmap->n) {
164       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
165     }
166     if (rank) {
167       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
168     }
169   }
170   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
171   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
172   CHKMEMQ;
173   PetscFunctionReturn(0);
174 }
175 
176 /*
177   Local utility routine that creates a mapping from the global column
178 number to the local number in the off-diagonal part of the local
179 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
180 a slightly higher hash table cost; without it it is not scalable (each processor
181 has an order N integer array but is fast to acess.
182 */
183 #undef __FUNCT__
184 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
185 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
186 {
187   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
188   PetscErrorCode ierr;
189   PetscInt       n = aij->B->cmap->n,i;
190 
191   PetscFunctionBegin;
192 #if defined (PETSC_USE_CTABLE)
193   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
194   for (i=0; i<n; i++){
195     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
196   }
197 #else
198   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
199   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
200   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
201   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
202 #endif
203   PetscFunctionReturn(0);
204 }
205 
206 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
207 { \
208     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
209     lastcol1 = col;\
210     while (high1-low1 > 5) { \
211       t = (low1+high1)/2; \
212       if (rp1[t] > col) high1 = t; \
213       else             low1  = t; \
214     } \
215       for (_i=low1; _i<high1; _i++) { \
216         if (rp1[_i] > col) break; \
217         if (rp1[_i] == col) { \
218           if (addv == ADD_VALUES) ap1[_i] += value;   \
219           else                    ap1[_i] = value; \
220           goto a_noinsert; \
221         } \
222       }  \
223       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
224       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
225       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
226       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
227       N = nrow1++ - 1; a->nz++; high1++; \
228       /* shift up all the later entries in this row */ \
229       for (ii=N; ii>=_i; ii--) { \
230         rp1[ii+1] = rp1[ii]; \
231         ap1[ii+1] = ap1[ii]; \
232       } \
233       rp1[_i] = col;  \
234       ap1[_i] = value;  \
235       a_noinsert: ; \
236       ailen[row] = nrow1; \
237 }
238 
239 
240 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
241 { \
242     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
243     lastcol2 = col;\
244     while (high2-low2 > 5) { \
245       t = (low2+high2)/2; \
246       if (rp2[t] > col) high2 = t; \
247       else             low2  = t; \
248     } \
249     for (_i=low2; _i<high2; _i++) {		\
250       if (rp2[_i] > col) break;			\
251       if (rp2[_i] == col) {			      \
252 	if (addv == ADD_VALUES) ap2[_i] += value;     \
253 	else                    ap2[_i] = value;      \
254 	goto b_noinsert;			      \
255       }						      \
256     }							      \
257     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
258     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
259     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
260     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
261     N = nrow2++ - 1; b->nz++; high2++;					\
262     /* shift up all the later entries in this row */			\
263     for (ii=N; ii>=_i; ii--) {						\
264       rp2[ii+1] = rp2[ii];						\
265       ap2[ii+1] = ap2[ii];						\
266     }									\
267     rp2[_i] = col;							\
268     ap2[_i] = value;							\
269     b_noinsert: ;								\
270     bilen[row] = nrow2;							\
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
275 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
276 {
277   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
278   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
279   PetscErrorCode ierr;
280   PetscInt       l,*garray = mat->garray,diag;
281 
282   PetscFunctionBegin;
283   /* code only works for square matrices A */
284 
285   /* find size of row to the left of the diagonal part */
286   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
287   row  = row - diag;
288   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
289     if (garray[b->j[b->i[row]+l]] > diag) break;
290   }
291   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
292 
293   /* diagonal part */
294   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
295 
296   /* right of diagonal part */
297   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 
301 #undef __FUNCT__
302 #define __FUNCT__ "MatSetValues_MPIAIJ"
303 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
304 {
305   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
306   PetscScalar    value;
307   PetscErrorCode ierr;
308   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
309   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
310   PetscBool      roworiented = aij->roworiented;
311 
312   /* Some Variables required in the macro */
313   Mat            A = aij->A;
314   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
315   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
316   MatScalar      *aa = a->a;
317   PetscBool      ignorezeroentries = a->ignorezeroentries;
318   Mat            B = aij->B;
319   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
320   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
321   MatScalar      *ba = b->a;
322 
323   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
324   PetscInt       nonew = a->nonew;
325   MatScalar      *ap1,*ap2;
326 
327   PetscFunctionBegin;
328   if (v) PetscValidScalarPointer(v,6);
329   for (i=0; i<m; i++) {
330     if (im[i] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
333 #endif
334     if (im[i] >= rstart && im[i] < rend) {
335       row      = im[i] - rstart;
336       lastcol1 = -1;
337       rp1      = aj + ai[row];
338       ap1      = aa + ai[row];
339       rmax1    = aimax[row];
340       nrow1    = ailen[row];
341       low1     = 0;
342       high1    = nrow1;
343       lastcol2 = -1;
344       rp2      = bj + bi[row];
345       ap2      = ba + bi[row];
346       rmax2    = bimax[row];
347       nrow2    = bilen[row];
348       low2     = 0;
349       high2    = nrow2;
350 
351       for (j=0; j<n; j++) {
352         if (v) {if (roworiented) value = v[i*n+j]; else value = v[i+j*m];} else value = 0.0;
353         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
354         if (in[j] >= cstart && in[j] < cend){
355           col = in[j] - cstart;
356           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
357         } else if (in[j] < 0) continue;
358 #if defined(PETSC_USE_DEBUG)
359         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
360 #endif
361         else {
362           if (mat->was_assembled) {
363             if (!aij->colmap) {
364               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
365             }
366 #if defined (PETSC_USE_CTABLE)
367             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
368 	    col--;
369 #else
370             col = aij->colmap[in[j]] - 1;
371 #endif
372             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
373               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
374               col =  in[j];
375               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
376               B = aij->B;
377               b = (Mat_SeqAIJ*)B->data;
378               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
379               rp2      = bj + bi[row];
380               ap2      = ba + bi[row];
381               rmax2    = bimax[row];
382               nrow2    = bilen[row];
383               low2     = 0;
384               high2    = nrow2;
385               bm       = aij->B->rmap->n;
386               ba = b->a;
387             }
388           } else col = in[j];
389           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
390         }
391       }
392     } else {
393       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
394       if (!aij->donotstash) {
395         if (roworiented) {
396           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
397         } else {
398           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
399         }
400       }
401     }
402   }
403   PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetValues_MPIAIJ"
408 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
409 {
410   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
413   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
414 
415   PetscFunctionBegin;
416   for (i=0; i<m; i++) {
417     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
418     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
419     if (idxm[i] >= rstart && idxm[i] < rend) {
420       row = idxm[i] - rstart;
421       for (j=0; j<n; j++) {
422         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
423         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
424         if (idxn[j] >= cstart && idxn[j] < cend){
425           col = idxn[j] - cstart;
426           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
427         } else {
428           if (!aij->colmap) {
429             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
430           }
431 #if defined (PETSC_USE_CTABLE)
432           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
433           col --;
434 #else
435           col = aij->colmap[idxn[j]] - 1;
436 #endif
437           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
438           else {
439             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
440           }
441         }
442       }
443     } else {
444       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
445     }
446   }
447   PetscFunctionReturn(0);
448 }
449 
450 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
454 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
455 {
456   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
457   PetscErrorCode ierr;
458   PetscInt       nstash,reallocs;
459   InsertMode     addv;
460 
461   PetscFunctionBegin;
462   if (aij->donotstash || mat->nooffprocentries) {
463     PetscFunctionReturn(0);
464   }
465 
466   /* make sure all processors are either in INSERTMODE or ADDMODE */
467   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
468   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
469   mat->insertmode = addv; /* in case this processor had no cache */
470 
471   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   PetscFunctionReturn(0);
475 }
476 
477 #undef __FUNCT__
478 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
479 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
480 {
481   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
482   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
483   PetscErrorCode ierr;
484   PetscMPIInt    n;
485   PetscInt       i,j,rstart,ncols,flg;
486   PetscInt       *row,*col;
487   PetscBool      other_disassembled;
488   PetscScalar    *val;
489   InsertMode     addv = mat->insertmode;
490 
491   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
492   PetscFunctionBegin;
493   if (!aij->donotstash && !mat->nooffprocentries) {
494     while (1) {
495       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
496       if (!flg) break;
497 
498       for (i=0; i<n;) {
499         /* Now identify the consecutive vals belonging to the same row */
500         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
501         if (j < n) ncols = j-i;
502         else       ncols = n-i;
503         /* Now assemble all these values with a single function call */
504         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
505         i = j;
506       }
507     }
508     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
509   }
510   a->compressedrow.use     = PETSC_FALSE;
511   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
512   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
513 
514   /* determine if any processor has disassembled, if so we must
515      also disassemble ourselfs, in order that we may reassemble. */
516   /*
517      if nonzero structure of submatrix B cannot change then we know that
518      no processor disassembled thus we can skip this stuff
519   */
520   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
521     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
522     if (mat->was_assembled && !other_disassembled) {
523       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
524     }
525   }
526   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
527     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
528   }
529   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
530   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
531   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
532   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
533 
534   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
535   aij->rowvalues = 0;
536 
537   /* used by MatAXPY() */
538   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
539   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
540 
541   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);aij->diag = 0;}
542   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
548 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
549 {
550   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
555   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatZeroRows_MPIAIJ"
561 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
562 {
563   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
564   PetscErrorCode    ierr;
565   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
566   PetscInt          i,*owners = A->rmap->range;
567   PetscInt          *nprocs,j,idx,nsends,row;
568   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
569   PetscInt          *rvalues,count,base,slen,*source;
570   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
571   MPI_Comm          comm = ((PetscObject)A)->comm;
572   MPI_Request       *send_waits,*recv_waits;
573   MPI_Status        recv_status,*send_status;
574   const PetscScalar *xx;
575   PetscScalar       *bb;
576 #if defined(PETSC_DEBUG)
577   PetscBool      found = PETSC_FALSE;
578 #endif
579 
580   PetscFunctionBegin;
581   /*  first count number of contributors to each processor */
582   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
583   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
584   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
585   j = 0;
586   for (i=0; i<N; i++) {
587     if (lastidx > (idx = rows[i])) j = 0;
588     lastidx = idx;
589     for (; j<size; j++) {
590       if (idx >= owners[j] && idx < owners[j+1]) {
591         nprocs[2*j]++;
592         nprocs[2*j+1] = 1;
593         owner[i] = j;
594 #if defined(PETSC_DEBUG)
595         found = PETSC_TRUE;
596 #endif
597         break;
598       }
599     }
600 #if defined(PETSC_DEBUG)
601     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
602     found = PETSC_FALSE;
603 #endif
604   }
605   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
606 
607   if (A->nooffproczerorows) {
608     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
609     nrecvs = nsends;
610     nmax   = N;
611   } else {
612     /* inform other processors of number of messages and max length*/
613     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
614   }
615 
616   /* post receives:   */
617   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
618   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
619   for (i=0; i<nrecvs; i++) {
620     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
621   }
622 
623   /* do sends:
624       1) starts[i] gives the starting index in svalues for stuff going to
625          the ith processor
626   */
627   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
628   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
629   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
630   starts[0] = 0;
631   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
632   for (i=0; i<N; i++) {
633     svalues[starts[owner[i]]++] = rows[i];
634   }
635 
636   starts[0] = 0;
637   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
638   count = 0;
639   for (i=0; i<size; i++) {
640     if (nprocs[2*i+1]) {
641       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
642     }
643   }
644   ierr = PetscFree(starts);CHKERRQ(ierr);
645 
646   base = owners[rank];
647 
648   /*  wait on receives */
649   ierr   = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
650   count  = nrecvs; slen = 0;
651   while (count) {
652     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
653     /* unpack receives into our local space */
654     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
655     source[imdex]  = recv_status.MPI_SOURCE;
656     lens[imdex]    = n;
657     slen          += n;
658     count--;
659   }
660   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
661 
662   /* move the data into the send scatter */
663   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
664   count = 0;
665   for (i=0; i<nrecvs; i++) {
666     values = rvalues + i*nmax;
667     for (j=0; j<lens[i]; j++) {
668       lrows[count++] = values[j] - base;
669     }
670   }
671   ierr = PetscFree(rvalues);CHKERRQ(ierr);
672   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
673   ierr = PetscFree(owner);CHKERRQ(ierr);
674   ierr = PetscFree(nprocs);CHKERRQ(ierr);
675 
676   /* fix right hand side if needed */
677   if (x && b) {
678     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
679     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
680     for (i=0; i<N; i++) {
681       bb[lrows[i]] = diag*xx[lrows[i]];
682     }
683     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
684     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
685   }
686   /*
687         Zero the required rows. If the "diagonal block" of the matrix
688      is square and the user wishes to set the diagonal we use separate
689      code so that MatSetValues() is not called for each diagonal allocating
690      new memory, thus calling lots of mallocs and slowing things down.
691 
692   */
693   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
694   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
695   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
696     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
697   } else if (diag != 0.0) {
698     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
699     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
700       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
701 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
702     }
703     for (i = 0; i < slen; i++) {
704       row  = lrows[i] + rstart;
705       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
706     }
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709   } else {
710     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
711   }
712   ierr = PetscFree(lrows);CHKERRQ(ierr);
713 
714   /* wait on sends */
715   if (nsends) {
716     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
717     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
718     ierr = PetscFree(send_status);CHKERRQ(ierr);
719   }
720   ierr = PetscFree(send_waits);CHKERRQ(ierr);
721   ierr = PetscFree(svalues);CHKERRQ(ierr);
722 
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatMult_MPIAIJ"
728 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
729 {
730   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
731   PetscErrorCode ierr;
732   PetscInt       nt;
733 
734   PetscFunctionBegin;
735   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
736   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
737   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
738   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
739   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
740   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
746 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatMultAdd_MPIAIJ"
758 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
759 {
760   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
761   PetscErrorCode ierr;
762 
763   PetscFunctionBegin;
764   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
765   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
766   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
767   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
768   PetscFunctionReturn(0);
769 }
770 
771 #undef __FUNCT__
772 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
773 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
774 {
775   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
776   PetscErrorCode ierr;
777   PetscBool      merged;
778 
779   PetscFunctionBegin;
780   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
781   /* do nondiagonal part */
782   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
783   if (!merged) {
784     /* send it on its way */
785     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
786     /* do local part */
787     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
788     /* receive remote parts: note this assumes the values are not actually */
789     /* added in yy until the next line, */
790     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
791   } else {
792     /* do local part */
793     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
794     /* send it on its way */
795     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
796     /* values actually were received in the Begin() but we need to call this nop */
797     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 EXTERN_C_BEGIN
803 #undef __FUNCT__
804 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
805 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
806 {
807   MPI_Comm       comm;
808   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
809   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
810   IS             Me,Notme;
811   PetscErrorCode ierr;
812   PetscInt       M,N,first,last,*notme,i;
813   PetscMPIInt    size;
814 
815   PetscFunctionBegin;
816 
817   /* Easy test: symmetric diagonal block */
818   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
819   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
820   if (!*f) PetscFunctionReturn(0);
821   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
822   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
823   if (size == 1) PetscFunctionReturn(0);
824 
825   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
826   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
827   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
828   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
829   for (i=0; i<first; i++) notme[i] = i;
830   for (i=last; i<M; i++) notme[i-last+first] = i;
831   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
832   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
833   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
834   Aoff = Aoffs[0];
835   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
836   Boff = Boffs[0];
837   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
838   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
839   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
840   ierr = ISDestroy(Me);CHKERRQ(ierr);
841   ierr = ISDestroy(Notme);CHKERRQ(ierr);
842 
843   PetscFunctionReturn(0);
844 }
845 EXTERN_C_END
846 
847 #undef __FUNCT__
848 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
849 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
850 {
851   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
852   PetscErrorCode ierr;
853 
854   PetscFunctionBegin;
855   /* do nondiagonal part */
856   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
857   /* send it on its way */
858   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
859   /* do local part */
860   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
861   /* receive remote parts */
862   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 /*
867   This only works correctly for square matrices where the subblock A->A is the
868    diagonal block
869 */
870 #undef __FUNCT__
871 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
872 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
873 {
874   PetscErrorCode ierr;
875   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
876 
877   PetscFunctionBegin;
878   if (A->rmap->N != A->cmap->N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
879   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
880   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatScale_MPIAIJ"
886 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
887 {
888   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
889   PetscErrorCode ierr;
890 
891   PetscFunctionBegin;
892   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
893   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatDestroy_MPIAIJ"
899 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
900 {
901   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
902   PetscErrorCode ierr;
903 
904   PetscFunctionBegin;
905 #if defined(PETSC_USE_LOG)
906   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
907 #endif
908   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
909   if (aij->diag) {ierr = VecDestroy(aij->diag);CHKERRQ(ierr);}
910   if (aij->A){ierr = MatDestroy(aij->A);CHKERRQ(ierr);}
911   if (aij->B){ierr = MatDestroy(aij->B);CHKERRQ(ierr);}
912 #if defined (PETSC_USE_CTABLE)
913   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
914 #else
915   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
916 #endif
917   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
918   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
919   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
920   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
921   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
922   ierr = PetscFree(aij);CHKERRQ(ierr);
923 
924   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
925   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
926   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
927   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
928   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
929   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
930   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
931   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
932   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatView_MPIAIJ_Binary"
938 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
939 {
940   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
941   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
942   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
945   int               fd;
946   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
947   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
948   PetscScalar       *column_values;
949   PetscInt          message_count,flowcontrolcount;
950 
951   PetscFunctionBegin;
952   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
953   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
954   nz   = A->nz + B->nz;
955   if (!rank) {
956     header[0] = MAT_FILE_CLASSID;
957     header[1] = mat->rmap->N;
958     header[2] = mat->cmap->N;
959     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
960     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
961     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
962     /* get largest number of rows any processor has */
963     rlen = mat->rmap->n;
964     range = mat->rmap->range;
965     for (i=1; i<size; i++) {
966       rlen = PetscMax(rlen,range[i+1] - range[i]);
967     }
968   } else {
969     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
970     rlen = mat->rmap->n;
971   }
972 
973   /* load up the local row counts */
974   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
975   for (i=0; i<mat->rmap->n; i++) {
976     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
977   }
978 
979   /* store the row lengths to the file */
980   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
981   if (!rank) {
982     MPI_Status status;
983     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
984     for (i=1; i<size; i++) {
985       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
986       rlen = range[i+1] - range[i];
987       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
988       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
989     }
990     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
991   } else {
992     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
993     ierr = MPI_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
994     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
995   }
996   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
997 
998   /* load up the local column indices */
999   nzmax = nz; /* )th processor needs space a largest processor needs */
1000   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1001   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1002   cnt  = 0;
1003   for (i=0; i<mat->rmap->n; i++) {
1004     for (j=B->i[i]; j<B->i[i+1]; j++) {
1005       if ( (col = garray[B->j[j]]) > cstart) break;
1006       column_indices[cnt++] = col;
1007     }
1008     for (k=A->i[i]; k<A->i[i+1]; k++) {
1009       column_indices[cnt++] = A->j[k] + cstart;
1010     }
1011     for (; j<B->i[i+1]; j++) {
1012       column_indices[cnt++] = garray[B->j[j]];
1013     }
1014   }
1015   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1016 
1017   /* store the column indices to the file */
1018    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1019   if (!rank) {
1020     MPI_Status status;
1021     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1022     for (i=1; i<size; i++) {
1023       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1024       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1025       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1026       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1027       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1028     }
1029      ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1030   } else {
1031     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1032     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1033     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1034     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1035   }
1036   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1037 
1038   /* load up the local column values */
1039   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1040   cnt  = 0;
1041   for (i=0; i<mat->rmap->n; i++) {
1042     for (j=B->i[i]; j<B->i[i+1]; j++) {
1043       if ( garray[B->j[j]] > cstart) break;
1044       column_values[cnt++] = B->a[j];
1045     }
1046     for (k=A->i[i]; k<A->i[i+1]; k++) {
1047       column_values[cnt++] = A->a[k];
1048     }
1049     for (; j<B->i[i+1]; j++) {
1050       column_values[cnt++] = B->a[j];
1051     }
1052   }
1053   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1054 
1055   /* store the column values to the file */
1056    ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1057   if (!rank) {
1058     MPI_Status status;
1059     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1060     for (i=1; i<size; i++) {
1061        ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1062       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1063       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1064       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1065       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1066     }
1067     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1068   } else {
1069     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1070     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1071     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1072     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1073   }
1074   ierr = PetscFree(column_values);CHKERRQ(ierr);
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1080 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1081 {
1082   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1083   PetscErrorCode    ierr;
1084   PetscMPIInt       rank = aij->rank,size = aij->size;
1085   PetscBool         isdraw,iascii,isbinary;
1086   PetscViewer       sviewer;
1087   PetscViewerFormat format;
1088 
1089   PetscFunctionBegin;
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1091   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1092   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1093   if (iascii) {
1094     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1095     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1096       MatInfo    info;
1097       PetscBool  inodes;
1098 
1099       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1100       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
1102       if (!inodes) {
1103         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1104 					      rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1105       } else {
1106         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1107 		    rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1108       }
1109       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1110       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1111       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1112       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1113       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1114       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1115       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1116       PetscFunctionReturn(0);
1117     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1118       PetscInt   inodecount,inodelimit,*inodes;
1119       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1120       if (inodes) {
1121         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1122       } else {
1123         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1124       }
1125       PetscFunctionReturn(0);
1126     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1127       PetscFunctionReturn(0);
1128     }
1129   } else if (isbinary) {
1130     if (size == 1) {
1131       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1132       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1133     } else {
1134       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1135     }
1136     PetscFunctionReturn(0);
1137   } else if (isdraw) {
1138     PetscDraw  draw;
1139     PetscBool  isnull;
1140     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1141     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1142   }
1143 
1144   if (size == 1) {
1145     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1146     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1147   } else {
1148     /* assemble the entire matrix onto first processor. */
1149     Mat         A;
1150     Mat_SeqAIJ  *Aloc;
1151     PetscInt    M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1152     MatScalar   *a;
1153 
1154     if (mat->rmap->N > 1024) {
1155       PetscBool  flg = PETSC_FALSE;
1156 
1157       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,PETSC_NULL);CHKERRQ(ierr);
1158       if (!flg) {
1159         SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1160       }
1161     }
1162 
1163     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1164     if (!rank) {
1165       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1166     } else {
1167       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1168     }
1169     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1170     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1171     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1172     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1173 
1174     /* copy over the A part */
1175     Aloc = (Mat_SeqAIJ*)aij->A->data;
1176     m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1177     row = mat->rmap->rstart;
1178     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap->rstart ;}
1179     for (i=0; i<m; i++) {
1180       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1181       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1182     }
1183     aj = Aloc->j;
1184     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap->rstart;}
1185 
1186     /* copy over the B part */
1187     Aloc = (Mat_SeqAIJ*)aij->B->data;
1188     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1189     row  = mat->rmap->rstart;
1190     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1191     ct   = cols;
1192     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1193     for (i=0; i<m; i++) {
1194       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1195       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1196     }
1197     ierr = PetscFree(ct);CHKERRQ(ierr);
1198     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1199     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1200     /*
1201        Everyone has to call to draw the matrix since the graphics waits are
1202        synchronized across all processors that share the PetscDraw object
1203     */
1204     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1205     if (!rank) {
1206       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1207       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1208       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1209       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1210     }
1211     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1212     ierr = MatDestroy(A);CHKERRQ(ierr);
1213   }
1214   PetscFunctionReturn(0);
1215 }
1216 
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatView_MPIAIJ"
1219 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1220 {
1221   PetscErrorCode ierr;
1222   PetscBool      iascii,isdraw,issocket,isbinary;
1223 
1224   PetscFunctionBegin;
1225   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1226   ierr  = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1227   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1228   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1229   if (iascii || isdraw || isbinary || issocket) {
1230     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1231   } else {
1232     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1233   }
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 #undef __FUNCT__
1238 #define __FUNCT__ "MatSOR_MPIAIJ"
1239 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1240 {
1241   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1242   PetscErrorCode ierr;
1243   Vec            bb1 = 0;
1244   PetscBool      hasop;
1245 
1246   PetscFunctionBegin;
1247   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1248     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1249   }
1250 
1251   if (flag == SOR_APPLY_UPPER) {
1252     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1253     PetscFunctionReturn(0);
1254   }
1255 
1256   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1257     if (flag & SOR_ZERO_INITIAL_GUESS) {
1258       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1259       its--;
1260     }
1261 
1262     while (its--) {
1263       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265 
1266       /* update rhs: bb1 = bb - B*x */
1267       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1268       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1269 
1270       /* local sweep */
1271       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1272     }
1273   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1274     if (flag & SOR_ZERO_INITIAL_GUESS) {
1275       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1276       its--;
1277     }
1278     while (its--) {
1279       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1280       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1281 
1282       /* update rhs: bb1 = bb - B*x */
1283       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1284       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1285 
1286       /* local sweep */
1287       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1288     }
1289   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1290     if (flag & SOR_ZERO_INITIAL_GUESS) {
1291       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1292       its--;
1293     }
1294     while (its--) {
1295       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298       /* update rhs: bb1 = bb - B*x */
1299       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1300       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1301 
1302       /* local sweep */
1303       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1304     }
1305   }  else if (flag & SOR_EISENSTAT) {
1306     Vec         xx1;
1307 
1308     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1309     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1310 
1311     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1312     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1313     if (!mat->diag) {
1314       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
1315       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1316     }
1317     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1318     if (hasop) {
1319       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1320     } else {
1321       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1322     }
1323     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1324 
1325     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1326 
1327     /* local sweep */
1328     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1329     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1330     ierr = VecDestroy(xx1);CHKERRQ(ierr);
1331   } else {
1332     SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Parallel SOR not supported");
1333   }
1334 
1335   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatPermute_MPIAIJ"
1341 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1342 {
1343   MPI_Comm       comm,pcomm;
1344   PetscInt       first,local_size,nrows;
1345   const PetscInt *rows;
1346   PetscMPIInt    size;
1347   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1352   /* make a collective version of 'rowp' */
1353   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1354   if (pcomm==comm) {
1355     crowp = rowp;
1356   } else {
1357     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1358     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1359     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
1360     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1361   }
1362   /* collect the global row permutation and invert it */
1363   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
1364   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
1365   if (pcomm!=comm) {
1366     ierr = ISDestroy(crowp);CHKERRQ(ierr);
1367   }
1368   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1369   /* get the local target indices */
1370   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
1371   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
1372   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
1373   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
1374   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
1375   ierr = ISDestroy(irowp);CHKERRQ(ierr);
1376   /* the column permutation is so much easier;
1377      make a local version of 'colp' and invert it */
1378   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
1379   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
1380   if (size==1) {
1381     lcolp = colp;
1382   } else {
1383     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
1384     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
1385     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
1386   }
1387   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
1388   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
1389   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
1390   if (size>1) {
1391     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
1392     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
1393   }
1394   /* now we just get the submatrix */
1395   ierr = MatGetSubMatrix_MPIAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
1396   /* clean up */
1397   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
1398   ierr = ISDestroy(icolp);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1404 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1405 {
1406   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1407   Mat            A = mat->A,B = mat->B;
1408   PetscErrorCode ierr;
1409   PetscReal      isend[5],irecv[5];
1410 
1411   PetscFunctionBegin;
1412   info->block_size     = 1.0;
1413   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1414   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1415   isend[3] = info->memory;  isend[4] = info->mallocs;
1416   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1417   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1418   isend[3] += info->memory;  isend[4] += info->mallocs;
1419   if (flag == MAT_LOCAL) {
1420     info->nz_used      = isend[0];
1421     info->nz_allocated = isend[1];
1422     info->nz_unneeded  = isend[2];
1423     info->memory       = isend[3];
1424     info->mallocs      = isend[4];
1425   } else if (flag == MAT_GLOBAL_MAX) {
1426     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1427     info->nz_used      = irecv[0];
1428     info->nz_allocated = irecv[1];
1429     info->nz_unneeded  = irecv[2];
1430     info->memory       = irecv[3];
1431     info->mallocs      = irecv[4];
1432   } else if (flag == MAT_GLOBAL_SUM) {
1433     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1434     info->nz_used      = irecv[0];
1435     info->nz_allocated = irecv[1];
1436     info->nz_unneeded  = irecv[2];
1437     info->memory       = irecv[3];
1438     info->mallocs      = irecv[4];
1439   }
1440   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1441   info->fill_ratio_needed = 0;
1442   info->factor_mallocs    = 0;
1443 
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatSetOption_MPIAIJ"
1449 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool  flg)
1450 {
1451   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1452   PetscErrorCode ierr;
1453 
1454   PetscFunctionBegin;
1455   switch (op) {
1456   case MAT_NEW_NONZERO_LOCATIONS:
1457   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1458   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1459   case MAT_KEEP_NONZERO_PATTERN:
1460   case MAT_NEW_NONZERO_LOCATION_ERR:
1461   case MAT_USE_INODES:
1462   case MAT_IGNORE_ZERO_ENTRIES:
1463     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1464     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1465     break;
1466   case MAT_ROW_ORIENTED:
1467     a->roworiented = flg;
1468     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1469     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1470     break;
1471   case MAT_NEW_DIAGONALS:
1472     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1473     break;
1474   case MAT_IGNORE_OFF_PROC_ENTRIES:
1475     a->donotstash = PETSC_TRUE;
1476     break;
1477   case MAT_SPD:
1478     A->spd_set                         = PETSC_TRUE;
1479     A->spd                             = flg;
1480     if (flg) {
1481       A->symmetric                     = PETSC_TRUE;
1482       A->structurally_symmetric        = PETSC_TRUE;
1483       A->symmetric_set                 = PETSC_TRUE;
1484       A->structurally_symmetric_set    = PETSC_TRUE;
1485     }
1486     break;
1487   case MAT_SYMMETRIC:
1488     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1489     break;
1490   case MAT_STRUCTURALLY_SYMMETRIC:
1491     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1492     break;
1493   case MAT_HERMITIAN:
1494     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1495     break;
1496   case MAT_SYMMETRY_ETERNAL:
1497     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1498     break;
1499   default:
1500     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatGetRow_MPIAIJ"
1507 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1508 {
1509   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1510   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1511   PetscErrorCode ierr;
1512   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1513   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1514   PetscInt       *cmap,*idx_p;
1515 
1516   PetscFunctionBegin;
1517   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1518   mat->getrowactive = PETSC_TRUE;
1519 
1520   if (!mat->rowvalues && (idx || v)) {
1521     /*
1522         allocate enough space to hold information from the longest row.
1523     */
1524     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1525     PetscInt   max = 1,tmp;
1526     for (i=0; i<matin->rmap->n; i++) {
1527       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1528       if (max < tmp) { max = tmp; }
1529     }
1530     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1531   }
1532 
1533   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1534   lrow = row - rstart;
1535 
1536   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1537   if (!v)   {pvA = 0; pvB = 0;}
1538   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1539   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1540   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1541   nztot = nzA + nzB;
1542 
1543   cmap  = mat->garray;
1544   if (v  || idx) {
1545     if (nztot) {
1546       /* Sort by increasing column numbers, assuming A and B already sorted */
1547       PetscInt imark = -1;
1548       if (v) {
1549         *v = v_p = mat->rowvalues;
1550         for (i=0; i<nzB; i++) {
1551           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1552           else break;
1553         }
1554         imark = i;
1555         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1556         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1557       }
1558       if (idx) {
1559         *idx = idx_p = mat->rowindices;
1560         if (imark > -1) {
1561           for (i=0; i<imark; i++) {
1562             idx_p[i] = cmap[cworkB[i]];
1563           }
1564         } else {
1565           for (i=0; i<nzB; i++) {
1566             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1567             else break;
1568           }
1569           imark = i;
1570         }
1571         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1572         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1573       }
1574     } else {
1575       if (idx) *idx = 0;
1576       if (v)   *v   = 0;
1577     }
1578   }
1579   *nz = nztot;
1580   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1581   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1587 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1588 {
1589   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1590 
1591   PetscFunctionBegin;
1592   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1593   aij->getrowactive = PETSC_FALSE;
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatNorm_MPIAIJ"
1599 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1600 {
1601   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1602   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1603   PetscErrorCode ierr;
1604   PetscInt       i,j,cstart = mat->cmap->rstart;
1605   PetscReal      sum = 0.0;
1606   MatScalar      *v;
1607 
1608   PetscFunctionBegin;
1609   if (aij->size == 1) {
1610     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1611   } else {
1612     if (type == NORM_FROBENIUS) {
1613       v = amat->a;
1614       for (i=0; i<amat->nz; i++) {
1615 #if defined(PETSC_USE_COMPLEX)
1616         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1617 #else
1618         sum += (*v)*(*v); v++;
1619 #endif
1620       }
1621       v = bmat->a;
1622       for (i=0; i<bmat->nz; i++) {
1623 #if defined(PETSC_USE_COMPLEX)
1624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1625 #else
1626         sum += (*v)*(*v); v++;
1627 #endif
1628       }
1629       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1630       *norm = sqrt(*norm);
1631     } else if (type == NORM_1) { /* max column norm */
1632       PetscReal *tmp,*tmp2;
1633       PetscInt  *jj,*garray = aij->garray;
1634       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1635       ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1636       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
1637       *norm = 0.0;
1638       v = amat->a; jj = amat->j;
1639       for (j=0; j<amat->nz; j++) {
1640         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1641       }
1642       v = bmat->a; jj = bmat->j;
1643       for (j=0; j<bmat->nz; j++) {
1644         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1645       }
1646       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1647       for (j=0; j<mat->cmap->N; j++) {
1648         if (tmp2[j] > *norm) *norm = tmp2[j];
1649       }
1650       ierr = PetscFree(tmp);CHKERRQ(ierr);
1651       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1652     } else if (type == NORM_INFINITY) { /* max row norm */
1653       PetscReal ntemp = 0.0;
1654       for (j=0; j<aij->A->rmap->n; j++) {
1655         v = amat->a + amat->i[j];
1656         sum = 0.0;
1657         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1658           sum += PetscAbsScalar(*v); v++;
1659         }
1660         v = bmat->a + bmat->i[j];
1661         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1662           sum += PetscAbsScalar(*v); v++;
1663         }
1664         if (sum > ntemp) ntemp = sum;
1665       }
1666       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1667     } else {
1668       SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for two norm");
1669     }
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatTranspose_MPIAIJ"
1676 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1677 {
1678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1679   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1680   PetscErrorCode ierr;
1681   PetscInt       M = A->rmap->N,N = A->cmap->N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i,*d_nnz;
1682   PetscInt       cstart=A->cmap->rstart,ncol;
1683   Mat            B;
1684   MatScalar      *array;
1685 
1686   PetscFunctionBegin;
1687   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1688 
1689   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n;
1690   ai = Aloc->i; aj = Aloc->j;
1691   bi = Bloc->i; bj = Bloc->j;
1692   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1693     /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1694     ierr = PetscMalloc((1+na)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1695     ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1696     for (i=0; i<ai[ma]; i++){
1697       d_nnz[aj[i]] ++;
1698       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1699     }
1700 
1701     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1702     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1703     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1704     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1705     ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1706   } else {
1707     B = *matout;
1708   }
1709 
1710   /* copy over the A part */
1711   array = Aloc->a;
1712   row = A->rmap->rstart;
1713   for (i=0; i<ma; i++) {
1714     ncol = ai[i+1]-ai[i];
1715     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1716     row++; array += ncol; aj += ncol;
1717   }
1718   aj = Aloc->j;
1719   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1720 
1721   /* copy over the B part */
1722   ierr = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1723   ierr = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
1724   array = Bloc->a;
1725   row = A->rmap->rstart;
1726   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1727   cols_tmp = cols;
1728   for (i=0; i<mb; i++) {
1729     ncol = bi[i+1]-bi[i];
1730     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1731     row++; array += ncol; cols_tmp += ncol;
1732   }
1733   ierr = PetscFree(cols);CHKERRQ(ierr);
1734 
1735   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1736   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1737   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1738     *matout = B;
1739   } else {
1740     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1741   }
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1747 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1748 {
1749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1750   Mat            a = aij->A,b = aij->B;
1751   PetscErrorCode ierr;
1752   PetscInt       s1,s2,s3;
1753 
1754   PetscFunctionBegin;
1755   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1756   if (rr) {
1757     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1758     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1759     /* Overlap communication with computation. */
1760     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1761   }
1762   if (ll) {
1763     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1764     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1765     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1766   }
1767   /* scale  the diagonal block */
1768   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1769 
1770   if (rr) {
1771     /* Do a scatter end and then right scale the off-diagonal block */
1772     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1773     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1774   }
1775 
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1781 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1782 {
1783   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1784   PetscErrorCode ierr;
1785 
1786   PetscFunctionBegin;
1787   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1788   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1789   ierr = PetscLayoutSetBlockSize(A->rmap,bs);CHKERRQ(ierr);
1790   ierr = PetscLayoutSetBlockSize(A->cmap,bs);CHKERRQ(ierr);
1791   PetscFunctionReturn(0);
1792 }
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1795 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1796 {
1797   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1798   PetscErrorCode ierr;
1799 
1800   PetscFunctionBegin;
1801   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatEqual_MPIAIJ"
1807 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
1808 {
1809   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1810   Mat            a,b,c,d;
1811   PetscBool      flg;
1812   PetscErrorCode ierr;
1813 
1814   PetscFunctionBegin;
1815   a = matA->A; b = matA->B;
1816   c = matB->A; d = matB->B;
1817 
1818   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1819   if (flg) {
1820     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1821   }
1822   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatCopy_MPIAIJ"
1828 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1829 {
1830   PetscErrorCode ierr;
1831   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1832   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1833 
1834   PetscFunctionBegin;
1835   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1836   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1837     /* because of the column compression in the off-processor part of the matrix a->B,
1838        the number of columns in a->B and b->B may be different, hence we cannot call
1839        the MatCopy() directly on the two parts. If need be, we can provide a more
1840        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1841        then copying the submatrices */
1842     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1843   } else {
1844     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1845     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1846   }
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1852 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1853 {
1854   PetscErrorCode ierr;
1855 
1856   PetscFunctionBegin;
1857   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatAXPY_MPIAIJ"
1863 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1864 {
1865   PetscErrorCode ierr;
1866   PetscInt       i;
1867   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1868   PetscBLASInt   bnz,one=1;
1869   Mat_SeqAIJ     *x,*y;
1870 
1871   PetscFunctionBegin;
1872   if (str == SAME_NONZERO_PATTERN) {
1873     PetscScalar alpha = a;
1874     x = (Mat_SeqAIJ *)xx->A->data;
1875     y = (Mat_SeqAIJ *)yy->A->data;
1876     bnz = PetscBLASIntCast(x->nz);
1877     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1878     x = (Mat_SeqAIJ *)xx->B->data;
1879     y = (Mat_SeqAIJ *)yy->B->data;
1880     bnz = PetscBLASIntCast(x->nz);
1881     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1882   } else if (str == SUBSET_NONZERO_PATTERN) {
1883     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1884 
1885     x = (Mat_SeqAIJ *)xx->B->data;
1886     y = (Mat_SeqAIJ *)yy->B->data;
1887     if (y->xtoy && y->XtoY != xx->B) {
1888       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1889       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1890     }
1891     if (!y->xtoy) { /* get xtoy */
1892       ierr = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1893       y->XtoY = xx->B;
1894       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1895     }
1896     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1897   } else {
1898     Mat B;
1899     PetscInt *nnz_d,*nnz_o;
1900     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
1901     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
1902     ierr = MatCreate(((PetscObject)Y)->comm,&B);CHKERRQ(ierr);
1903     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1904     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1905     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
1906     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1907     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->B,xx->B,nnz_o);CHKERRQ(ierr);
1908     ierr = MatMPIAIJSetPreallocation(B,PETSC_NULL,nnz_d,PETSC_NULL,nnz_o);CHKERRQ(ierr);
1909     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1910     ierr = MatHeaderReplace(Y,B);
1911     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1912     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatConjugate_MPIAIJ"
1921 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1922 {
1923 #if defined(PETSC_USE_COMPLEX)
1924   PetscErrorCode ierr;
1925   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1929   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1930 #else
1931   PetscFunctionBegin;
1932 #endif
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatRealPart_MPIAIJ"
1938 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1939 {
1940   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1945   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1951 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1952 {
1953   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1958   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #ifdef PETSC_HAVE_PBGL
1963 
1964 #include <boost/parallel/mpi/bsp_process_group.hpp>
1965 #include <boost/graph/distributed/ilu_default_graph.hpp>
1966 #include <boost/graph/distributed/ilu_0_block.hpp>
1967 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1968 #include <boost/graph/distributed/petsc/interface.hpp>
1969 #include <boost/multi_array.hpp>
1970 #include <boost/parallel/distributed_property_map->hpp>
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1974 /*
1975   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1976 */
1977 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
1978 {
1979   namespace petsc = boost::distributed::petsc;
1980 
1981   namespace graph_dist = boost::graph::distributed;
1982   using boost::graph::distributed::ilu_default::process_group_type;
1983   using boost::graph::ilu_permuted;
1984 
1985   PetscBool       row_identity, col_identity;
1986   PetscContainer  c;
1987   PetscInt        m, n, M, N;
1988   PetscErrorCode  ierr;
1989 
1990   PetscFunctionBegin;
1991   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1992   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1993   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1994   if (!row_identity || !col_identity) {
1995     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1996   }
1997 
1998   process_group_type pg;
1999   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2000   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2001   lgraph_type&   level_graph = *lgraph_p;
2002   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2003 
2004   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2005   ilu_permuted(level_graph);
2006 
2007   /* put together the new matrix */
2008   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
2009   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2010   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2011   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2012   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2013   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2015 
2016   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
2017   ierr = PetscContainerSetPointer(c, lgraph_p);
2018   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2024 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2025 {
2026   PetscFunctionBegin;
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatSolve_MPIAIJ"
2032 /*
2033   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2034 */
2035 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2036 {
2037   namespace graph_dist = boost::graph::distributed;
2038 
2039   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
2040   lgraph_type*   lgraph_p;
2041   PetscContainer c;
2042   PetscErrorCode ierr;
2043 
2044   PetscFunctionBegin;
2045   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
2046   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
2047   ierr = VecCopy(b, x);CHKERRQ(ierr);
2048 
2049   PetscScalar* array_x;
2050   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2051   PetscInt sx;
2052   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2053 
2054   PetscScalar* array_b;
2055   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2056   PetscInt sb;
2057   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2058 
2059   lgraph_type&   level_graph = *lgraph_p;
2060   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2061 
2062   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2063   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
2064                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
2065 
2066   typedef boost::iterator_property_map<array_ref_type::iterator,
2067                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2068   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
2069                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2070 
2071   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2072 
2073   PetscFunctionReturn(0);
2074 }
2075 #endif
2076 
2077 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2078   PetscInt       nzlocal,nsends,nrecvs;
2079   PetscMPIInt    *send_rank,*recv_rank;
2080   PetscInt       *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2081   PetscScalar    *sbuf_a,**rbuf_a;
2082   PetscErrorCode (*MatDestroy)(Mat);
2083 } Mat_Redundant;
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2087 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2088 {
2089   PetscErrorCode       ierr;
2090   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
2091   PetscInt             i;
2092 
2093   PetscFunctionBegin;
2094   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2095   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2096   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2097   for (i=0; i<redund->nrecvs; i++){
2098     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2099     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2100   }
2101   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2102   ierr = PetscFree(redund);CHKERRQ(ierr);
2103   PetscFunctionReturn(0);
2104 }
2105 
2106 #undef __FUNCT__
2107 #define __FUNCT__ "MatDestroy_MatRedundant"
2108 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2109 {
2110   PetscErrorCode  ierr;
2111   PetscContainer  container;
2112   Mat_Redundant   *redund=PETSC_NULL;
2113 
2114   PetscFunctionBegin;
2115   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2116   if (container) {
2117     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2118   } else {
2119     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2120   }
2121   A->ops->destroy = redund->MatDestroy;
2122   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2123   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2124   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2130 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2131 {
2132   PetscMPIInt    rank,size;
2133   MPI_Comm       comm=((PetscObject)mat)->comm;
2134   PetscErrorCode ierr;
2135   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
2136   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
2137   PetscInt       *rowrange=mat->rmap->range;
2138   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2139   Mat            A=aij->A,B=aij->B,C=*matredundant;
2140   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2141   PetscScalar    *sbuf_a;
2142   PetscInt       nzlocal=a->nz+b->nz;
2143   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2144   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2145   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2146   MatScalar      *aworkA,*aworkB;
2147   PetscScalar    *vals;
2148   PetscMPIInt    tag1,tag2,tag3,imdex;
2149   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
2150                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
2151   MPI_Status     recv_status,*send_status;
2152   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
2153   PetscInt       **rbuf_j=PETSC_NULL;
2154   PetscScalar    **rbuf_a=PETSC_NULL;
2155   Mat_Redundant  *redund=PETSC_NULL;
2156   PetscContainer container;
2157 
2158   PetscFunctionBegin;
2159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2160   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2161 
2162   if (reuse == MAT_REUSE_MATRIX) {
2163     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2164     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2165     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2166     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2167     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
2168     if (container) {
2169       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
2170     } else {
2171       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2172     }
2173     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2174 
2175     nsends    = redund->nsends;
2176     nrecvs    = redund->nrecvs;
2177     send_rank = redund->send_rank;
2178     recv_rank = redund->recv_rank;
2179     sbuf_nz   = redund->sbuf_nz;
2180     rbuf_nz   = redund->rbuf_nz;
2181     sbuf_j    = redund->sbuf_j;
2182     sbuf_a    = redund->sbuf_a;
2183     rbuf_j    = redund->rbuf_j;
2184     rbuf_a    = redund->rbuf_a;
2185   }
2186 
2187   if (reuse == MAT_INITIAL_MATRIX){
2188     PetscMPIInt  subrank,subsize;
2189     PetscInt     nleftover,np_subcomm;
2190     /* get the destination processors' id send_rank, nsends and nrecvs */
2191     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2192     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2193     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);
2194     np_subcomm = size/nsubcomm;
2195     nleftover  = size - nsubcomm*np_subcomm;
2196     nsends = 0; nrecvs = 0;
2197     for (i=0; i<size; i++){ /* i=rank*/
2198       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
2199         send_rank[nsends] = i; nsends++;
2200         recv_rank[nrecvs++] = i;
2201       }
2202     }
2203     if (rank >= size - nleftover){/* this proc is a leftover processor */
2204       i = size-nleftover-1;
2205       j = 0;
2206       while (j < nsubcomm - nleftover){
2207         send_rank[nsends++] = i;
2208         i--; j++;
2209       }
2210     }
2211 
2212     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
2213       for (i=0; i<nleftover; i++){
2214         recv_rank[nrecvs++] = size-nleftover+i;
2215       }
2216     }
2217 
2218     /* allocate sbuf_j, sbuf_a */
2219     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2220     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2221     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2222   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2223 
2224   /* copy mat's local entries into the buffers */
2225   if (reuse == MAT_INITIAL_MATRIX){
2226     rownz_max = 0;
2227     rptr = sbuf_j;
2228     cols = sbuf_j + rend-rstart + 1;
2229     vals = sbuf_a;
2230     rptr[0] = 0;
2231     for (i=0; i<rend-rstart; i++){
2232       row = i + rstart;
2233       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2234       ncols  = nzA + nzB;
2235       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2236       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2237       /* load the column indices for this row into cols */
2238       lwrite = 0;
2239       for (l=0; l<nzB; l++) {
2240         if ((ctmp = bmap[cworkB[l]]) < cstart){
2241           vals[lwrite]   = aworkB[l];
2242           cols[lwrite++] = ctmp;
2243         }
2244       }
2245       for (l=0; l<nzA; l++){
2246         vals[lwrite]   = aworkA[l];
2247         cols[lwrite++] = cstart + cworkA[l];
2248       }
2249       for (l=0; l<nzB; l++) {
2250         if ((ctmp = bmap[cworkB[l]]) >= cend){
2251           vals[lwrite]   = aworkB[l];
2252           cols[lwrite++] = ctmp;
2253         }
2254       }
2255       vals += ncols;
2256       cols += ncols;
2257       rptr[i+1] = rptr[i] + ncols;
2258       if (rownz_max < ncols) rownz_max = ncols;
2259     }
2260     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2261   } else { /* only copy matrix values into sbuf_a */
2262     rptr = sbuf_j;
2263     vals = sbuf_a;
2264     rptr[0] = 0;
2265     for (i=0; i<rend-rstart; i++){
2266       row = i + rstart;
2267       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2268       ncols  = nzA + nzB;
2269       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2270       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2271       lwrite = 0;
2272       for (l=0; l<nzB; l++) {
2273         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2274       }
2275       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2276       for (l=0; l<nzB; l++) {
2277         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2278       }
2279       vals += ncols;
2280       rptr[i+1] = rptr[i] + ncols;
2281     }
2282   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2283 
2284   /* send nzlocal to others, and recv other's nzlocal */
2285   /*--------------------------------------------------*/
2286   if (reuse == MAT_INITIAL_MATRIX){
2287     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2288     s_waits2 = s_waits3 + nsends;
2289     s_waits1 = s_waits2 + nsends;
2290     r_waits1 = s_waits1 + nsends;
2291     r_waits2 = r_waits1 + nrecvs;
2292     r_waits3 = r_waits2 + nrecvs;
2293   } else {
2294     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2295     r_waits3 = s_waits3 + nsends;
2296   }
2297 
2298   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2299   if (reuse == MAT_INITIAL_MATRIX){
2300     /* get new tags to keep the communication clean */
2301     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2303     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2304 
2305     /* post receives of other's nzlocal */
2306     for (i=0; i<nrecvs; i++){
2307       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2308     }
2309     /* send nzlocal to others */
2310     for (i=0; i<nsends; i++){
2311       sbuf_nz[i] = nzlocal;
2312       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2313     }
2314     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2315     count = nrecvs;
2316     while (count) {
2317       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2318       recv_rank[imdex] = recv_status.MPI_SOURCE;
2319       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2320       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2321 
2322       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2323       rbuf_nz[imdex] += i + 2;
2324       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2325       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2326       count--;
2327     }
2328     /* wait on sends of nzlocal */
2329     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2330     /* send mat->i,j to others, and recv from other's */
2331     /*------------------------------------------------*/
2332     for (i=0; i<nsends; i++){
2333       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2334       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2335     }
2336     /* wait on receives of mat->i,j */
2337     /*------------------------------*/
2338     count = nrecvs;
2339     while (count) {
2340       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2341       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2342       count--;
2343     }
2344     /* wait on sends of mat->i,j */
2345     /*---------------------------*/
2346     if (nsends) {
2347       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2348     }
2349   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2350 
2351   /* post receives, send and receive mat->a */
2352   /*----------------------------------------*/
2353   for (imdex=0; imdex<nrecvs; imdex++) {
2354     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2355   }
2356   for (i=0; i<nsends; i++){
2357     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2358   }
2359   count = nrecvs;
2360   while (count) {
2361     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2362     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2363     count--;
2364   }
2365   if (nsends) {
2366     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2367   }
2368 
2369   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2370 
2371   /* create redundant matrix */
2372   /*-------------------------*/
2373   if (reuse == MAT_INITIAL_MATRIX){
2374     /* compute rownz_max for preallocation */
2375     for (imdex=0; imdex<nrecvs; imdex++){
2376       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2377       rptr = rbuf_j[imdex];
2378       for (i=0; i<j; i++){
2379         ncols = rptr[i+1] - rptr[i];
2380         if (rownz_max < ncols) rownz_max = ncols;
2381       }
2382     }
2383 
2384     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2385     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2386     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2387     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2388     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2389   } else {
2390     C = *matredundant;
2391   }
2392 
2393   /* insert local matrix entries */
2394   rptr = sbuf_j;
2395   cols = sbuf_j + rend-rstart + 1;
2396   vals = sbuf_a;
2397   for (i=0; i<rend-rstart; i++){
2398     row   = i + rstart;
2399     ncols = rptr[i+1] - rptr[i];
2400     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2401     vals += ncols;
2402     cols += ncols;
2403   }
2404   /* insert received matrix entries */
2405   for (imdex=0; imdex<nrecvs; imdex++){
2406     rstart = rowrange[recv_rank[imdex]];
2407     rend   = rowrange[recv_rank[imdex]+1];
2408     rptr = rbuf_j[imdex];
2409     cols = rbuf_j[imdex] + rend-rstart + 1;
2410     vals = rbuf_a[imdex];
2411     for (i=0; i<rend-rstart; i++){
2412       row   = i + rstart;
2413       ncols = rptr[i+1] - rptr[i];
2414       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2415       vals += ncols;
2416       cols += ncols;
2417     }
2418   }
2419   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2420   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2421   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2422   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
2423   if (reuse == MAT_INITIAL_MATRIX){
2424     PetscContainer container;
2425     *matredundant = C;
2426     /* create a supporting struct and attach it to C for reuse */
2427     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2428     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2429     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2430     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2431     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2432 
2433     redund->nzlocal = nzlocal;
2434     redund->nsends  = nsends;
2435     redund->nrecvs  = nrecvs;
2436     redund->send_rank = send_rank;
2437     redund->recv_rank = recv_rank;
2438     redund->sbuf_nz = sbuf_nz;
2439     redund->rbuf_nz = rbuf_nz;
2440     redund->sbuf_j  = sbuf_j;
2441     redund->sbuf_a  = sbuf_a;
2442     redund->rbuf_j  = rbuf_j;
2443     redund->rbuf_a  = rbuf_a;
2444 
2445     redund->MatDestroy = C->ops->destroy;
2446     C->ops->destroy    = MatDestroy_MatRedundant;
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2453 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2454 {
2455   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2456   PetscErrorCode ierr;
2457   PetscInt       i,*idxb = 0;
2458   PetscScalar    *va,*vb;
2459   Vec            vtmp;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2463   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2464   if (idx) {
2465     for (i=0; i<A->rmap->n; i++) {
2466       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2467     }
2468   }
2469 
2470   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2471   if (idx) {
2472     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2473   }
2474   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2475   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2476 
2477   for (i=0; i<A->rmap->n; i++){
2478     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2479       va[i] = vb[i];
2480       if (idx) idx[i] = a->garray[idxb[i]];
2481     }
2482   }
2483 
2484   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2485   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2486   if (idxb) {
2487     ierr = PetscFree(idxb);CHKERRQ(ierr);
2488   }
2489   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #undef __FUNCT__
2494 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2495 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2496 {
2497   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2498   PetscErrorCode ierr;
2499   PetscInt       i,*idxb = 0;
2500   PetscScalar    *va,*vb;
2501   Vec            vtmp;
2502 
2503   PetscFunctionBegin;
2504   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2505   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2506   if (idx) {
2507     for (i=0; i<A->cmap->n; i++) {
2508       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2509     }
2510   }
2511 
2512   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2513   if (idx) {
2514     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
2515   }
2516   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2517   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2518 
2519   for (i=0; i<A->rmap->n; i++){
2520     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2521       va[i] = vb[i];
2522       if (idx) idx[i] = a->garray[idxb[i]];
2523     }
2524   }
2525 
2526   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2527   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2528   if (idxb) {
2529     ierr = PetscFree(idxb);CHKERRQ(ierr);
2530   }
2531   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 #undef __FUNCT__
2536 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2537 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2538 {
2539   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2540   PetscInt       n      = A->rmap->n;
2541   PetscInt       cstart = A->cmap->rstart;
2542   PetscInt      *cmap   = mat->garray;
2543   PetscInt      *diagIdx, *offdiagIdx;
2544   Vec            diagV, offdiagV;
2545   PetscScalar   *a, *diagA, *offdiagA;
2546   PetscInt       r;
2547   PetscErrorCode ierr;
2548 
2549   PetscFunctionBegin;
2550   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2551   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2552   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2553   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2554   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2555   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2556   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2557   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2558   for(r = 0; r < n; ++r) {
2559     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2560       a[r]   = diagA[r];
2561       idx[r] = cstart + diagIdx[r];
2562     } else {
2563       a[r]   = offdiagA[r];
2564       idx[r] = cmap[offdiagIdx[r]];
2565     }
2566   }
2567   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2568   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2569   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2570   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2571   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2572   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2578 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2579 {
2580   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2581   PetscInt       n      = A->rmap->n;
2582   PetscInt       cstart = A->cmap->rstart;
2583   PetscInt      *cmap   = mat->garray;
2584   PetscInt      *diagIdx, *offdiagIdx;
2585   Vec            diagV, offdiagV;
2586   PetscScalar   *a, *diagA, *offdiagA;
2587   PetscInt       r;
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2592   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2593   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2594   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2595   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2596   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2597   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2598   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2599   for(r = 0; r < n; ++r) {
2600     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2601       a[r]   = diagA[r];
2602       idx[r] = cstart + diagIdx[r];
2603     } else {
2604       a[r]   = offdiagA[r];
2605       idx[r] = cmap[offdiagIdx[r]];
2606     }
2607   }
2608   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2609   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2610   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2611   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2612   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2613   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2619 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat)
2620 {
2621   PetscErrorCode ierr;
2622   Mat            *dummy;
2623 
2624   PetscFunctionBegin;
2625   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2626   *newmat = *dummy;
2627   ierr = PetscFree(dummy);CHKERRQ(ierr);
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2632 /* -------------------------------------------------------------------*/
2633 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2634        MatGetRow_MPIAIJ,
2635        MatRestoreRow_MPIAIJ,
2636        MatMult_MPIAIJ,
2637 /* 4*/ MatMultAdd_MPIAIJ,
2638        MatMultTranspose_MPIAIJ,
2639        MatMultTransposeAdd_MPIAIJ,
2640 #ifdef PETSC_HAVE_PBGL
2641        MatSolve_MPIAIJ,
2642 #else
2643        0,
2644 #endif
2645        0,
2646        0,
2647 /*10*/ 0,
2648        0,
2649        0,
2650        MatSOR_MPIAIJ,
2651        MatTranspose_MPIAIJ,
2652 /*15*/ MatGetInfo_MPIAIJ,
2653        MatEqual_MPIAIJ,
2654        MatGetDiagonal_MPIAIJ,
2655        MatDiagonalScale_MPIAIJ,
2656        MatNorm_MPIAIJ,
2657 /*20*/ MatAssemblyBegin_MPIAIJ,
2658        MatAssemblyEnd_MPIAIJ,
2659        MatSetOption_MPIAIJ,
2660        MatZeroEntries_MPIAIJ,
2661 /*24*/ MatZeroRows_MPIAIJ,
2662        0,
2663 #ifdef PETSC_HAVE_PBGL
2664        0,
2665 #else
2666        0,
2667 #endif
2668        0,
2669        0,
2670 /*29*/ MatSetUpPreallocation_MPIAIJ,
2671 #ifdef PETSC_HAVE_PBGL
2672        0,
2673 #else
2674        0,
2675 #endif
2676        0,
2677        0,
2678        0,
2679 /*34*/ MatDuplicate_MPIAIJ,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*39*/ MatAXPY_MPIAIJ,
2685        MatGetSubMatrices_MPIAIJ,
2686        MatIncreaseOverlap_MPIAIJ,
2687        MatGetValues_MPIAIJ,
2688        MatCopy_MPIAIJ,
2689 /*44*/ MatGetRowMax_MPIAIJ,
2690        MatScale_MPIAIJ,
2691        0,
2692        0,
2693        0,
2694 /*49*/ MatSetBlockSize_MPIAIJ,
2695        0,
2696        0,
2697        0,
2698        0,
2699 /*54*/ MatFDColoringCreate_MPIAIJ,
2700        0,
2701        MatSetUnfactored_MPIAIJ,
2702        MatPermute_MPIAIJ,
2703        0,
2704 /*59*/ MatGetSubMatrix_MPIAIJ,
2705        MatDestroy_MPIAIJ,
2706        MatView_MPIAIJ,
2707        0,
2708        0,
2709 /*64*/ 0,
2710        0,
2711        0,
2712        0,
2713        0,
2714 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2715        MatGetRowMinAbs_MPIAIJ,
2716        0,
2717        MatSetColoring_MPIAIJ,
2718 #if defined(PETSC_HAVE_ADIC)
2719        MatSetValuesAdic_MPIAIJ,
2720 #else
2721        0,
2722 #endif
2723        MatSetValuesAdifor_MPIAIJ,
2724 /*75*/ MatFDColoringApply_AIJ,
2725        0,
2726        0,
2727        0,
2728        0,
2729 /*80*/ 0,
2730        0,
2731        0,
2732 /*83*/ MatLoad_MPIAIJ,
2733        0,
2734        0,
2735        0,
2736        0,
2737        0,
2738 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
2739        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2740        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2741        MatPtAP_Basic,
2742        MatPtAPSymbolic_MPIAIJ,
2743 /*94*/ MatPtAPNumeric_MPIAIJ,
2744        0,
2745        0,
2746        0,
2747        0,
2748 /*99*/ 0,
2749        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2750        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2751        MatConjugate_MPIAIJ,
2752        0,
2753 /*104*/MatSetValuesRow_MPIAIJ,
2754        MatRealPart_MPIAIJ,
2755        MatImaginaryPart_MPIAIJ,
2756        0,
2757        0,
2758 /*109*/0,
2759        MatGetRedundantMatrix_MPIAIJ,
2760        MatGetRowMin_MPIAIJ,
2761        0,
2762        0,
2763 /*114*/MatGetSeqNonzerostructure_MPIAIJ,
2764        0,
2765        0,
2766        0,
2767        0,
2768 /*119*/0,
2769        0,
2770        0,
2771        0,
2772        MatGetMultiProcBlock_MPIAIJ
2773 };
2774 
2775 /* ----------------------------------------------------------------------------------------*/
2776 
2777 EXTERN_C_BEGIN
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2780 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2781 {
2782   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2787   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 EXTERN_C_END
2791 
2792 EXTERN_C_BEGIN
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2795 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2796 {
2797   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2798   PetscErrorCode ierr;
2799 
2800   PetscFunctionBegin;
2801   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2802   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 EXTERN_C_END
2806 
2807 EXTERN_C_BEGIN
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2810 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2811 {
2812   Mat_MPIAIJ     *b;
2813   PetscErrorCode ierr;
2814   PetscInt       i;
2815 
2816   PetscFunctionBegin;
2817   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2818   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2819   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2820   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2821 
2822   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2825   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2826   if (d_nnz) {
2827     for (i=0; i<B->rmap->n; i++) {
2828       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2829     }
2830   }
2831   if (o_nnz) {
2832     for (i=0; i<B->rmap->n; i++) {
2833       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2834     }
2835   }
2836   b = (Mat_MPIAIJ*)B->data;
2837 
2838   if (!B->preallocated) {
2839     /* Explicitly create 2 MATSEQAIJ matrices. */
2840     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2841     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2842     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2843     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2844     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2845     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2846     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2847     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2848   }
2849 
2850   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2851   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2852   B->preallocated = PETSC_TRUE;
2853   PetscFunctionReturn(0);
2854 }
2855 EXTERN_C_END
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2859 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2860 {
2861   Mat            mat;
2862   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2863   PetscErrorCode ierr;
2864 
2865   PetscFunctionBegin;
2866   *newmat       = 0;
2867   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2868   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2869   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2870   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2871   a    = (Mat_MPIAIJ*)mat->data;
2872 
2873   mat->factortype    = matin->factortype;
2874   mat->rmap->bs      = matin->rmap->bs;
2875   mat->assembled    = PETSC_TRUE;
2876   mat->insertmode   = NOT_SET_VALUES;
2877   mat->preallocated = PETSC_TRUE;
2878 
2879   a->size           = oldmat->size;
2880   a->rank           = oldmat->rank;
2881   a->donotstash     = oldmat->donotstash;
2882   a->roworiented    = oldmat->roworiented;
2883   a->rowindices     = 0;
2884   a->rowvalues      = 0;
2885   a->getrowactive   = PETSC_FALSE;
2886 
2887   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2888   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2889 
2890   if (oldmat->colmap) {
2891 #if defined (PETSC_USE_CTABLE)
2892     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2893 #else
2894     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2895     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2896     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2897 #endif
2898   } else a->colmap = 0;
2899   if (oldmat->garray) {
2900     PetscInt len;
2901     len  = oldmat->B->cmap->n;
2902     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2903     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2904     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2905   } else a->garray = 0;
2906 
2907   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2908   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2909   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2910   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2911   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2912   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2913   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2914   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2915   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2916   *newmat = mat;
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 /*
2921     Allows sending/receiving larger messages then 2 gigabytes in a single call
2922 */
2923 static int MPILong_Send(void *mess,PetscInt cnt, MPI_Datatype type,int to, int tag, MPI_Comm comm)
2924 {
2925   int             ierr;
2926   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2927   PetscInt        i,numchunks;
2928   PetscMPIInt     icnt;
2929 
2930   numchunks = cnt/CHUNKSIZE + 1;
2931   for (i=0; i<numchunks; i++) {
2932     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2933     ierr = MPI_Send(mess,icnt,type,to,tag,comm);
2934     if (type == MPIU_INT) {
2935       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2936     } else if (type == MPIU_SCALAR) {
2937       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2938     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2939   }
2940   return 0;
2941 }
2942 static int MPILong_Recv(void *mess,PetscInt cnt, MPI_Datatype type,int from, int tag, MPI_Comm comm)
2943 {
2944   int             ierr;
2945   static PetscInt CHUNKSIZE = 250000000; /* 250,000,000 */
2946   MPI_Status      status;
2947   PetscInt        i,numchunks;
2948   PetscMPIInt     icnt;
2949 
2950   numchunks = cnt/CHUNKSIZE + 1;
2951   for (i=0; i<numchunks; i++) {
2952     icnt = PetscMPIIntCast((i < numchunks-1) ? CHUNKSIZE : cnt - (numchunks-1)*CHUNKSIZE);
2953     ierr = MPI_Recv(mess,icnt,type,from,tag,comm,&status);
2954     if (type == MPIU_INT) {
2955       mess = (void*) (((PetscInt*)mess) + CHUNKSIZE);
2956     } else if (type == MPIU_SCALAR) {
2957       mess = (void*) (((PetscScalar*)mess) + CHUNKSIZE);
2958     } else SETERRQ(comm,PETSC_ERR_SUP,"No support for this datatype");
2959   }
2960   return 0;
2961 }
2962 
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatLoad_MPIAIJ"
2965 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2966 {
2967   PetscScalar    *vals,*svals;
2968   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2969   PetscErrorCode ierr;
2970   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
2971   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
2972   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2973   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2974   PetscInt       cend,cstart,n,*rowners,sizesset=1;
2975   int            fd;
2976 
2977   PetscFunctionBegin;
2978   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2979   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2980   if (!rank) {
2981     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2982     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2983     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2984   }
2985 
2986   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
2987 
2988   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2989   M = header[1]; N = header[2];
2990   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2991   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
2992   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
2993 
2994   /* If global sizes are set, check if they are consistent with that given in the file */
2995   if (sizesset) {
2996     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
2997   }
2998   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2999   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3000 
3001   /* determine ownership of all rows */
3002   if (newMat->rmap->n < 0 ) m    = M/size + ((M % size) > rank); /* PETSC_DECIDE */
3003   else m = newMat->rmap->n; /* Set by user */
3004 
3005   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3006   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3007 
3008   /* First process needs enough room for process with most rows */
3009   if (!rank) {
3010     mmax       = rowners[1];
3011     for (i=2; i<size; i++) {
3012       mmax = PetscMax(mmax,rowners[i]);
3013     }
3014   } else mmax = m;
3015 
3016   rowners[0] = 0;
3017   for (i=2; i<=size; i++) {
3018     rowners[i] += rowners[i-1];
3019   }
3020   rstart = rowners[rank];
3021   rend   = rowners[rank+1];
3022 
3023   /* distribute row lengths to all processors */
3024   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
3025   if (!rank) {
3026     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3027     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3028     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3029     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3030     for (j=0; j<m; j++) {
3031       procsnz[0] += ourlens[j];
3032     }
3033     for (i=1; i<size; i++) {
3034       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3035       /* calculate the number of nonzeros on each processor */
3036       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3037         procsnz[i] += rowlengths[j];
3038       }
3039       ierr = MPILong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3040     }
3041     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3042   } else {
3043     ierr = MPILong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3044   }
3045 
3046   if (!rank) {
3047     /* determine max buffer needed and allocate it */
3048     maxnz = 0;
3049     for (i=0; i<size; i++) {
3050       maxnz = PetscMax(maxnz,procsnz[i]);
3051     }
3052     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3053 
3054     /* read in my part of the matrix column indices  */
3055     nz   = procsnz[0];
3056     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3057     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3058 
3059     /* read in every one elses and ship off */
3060     for (i=1; i<size; i++) {
3061       nz     = procsnz[i];
3062       ierr   = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3063       ierr   = MPILong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3064     }
3065     ierr = PetscFree(cols);CHKERRQ(ierr);
3066   } else {
3067     /* determine buffer space needed for message */
3068     nz = 0;
3069     for (i=0; i<m; i++) {
3070       nz += ourlens[i];
3071     }
3072     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3073 
3074     /* receive message of column indices*/
3075     ierr = MPILong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3076   }
3077 
3078   /* determine column ownership if matrix is not square */
3079   if (N != M) {
3080     if (newMat->cmap->n < 0) n      = N/size + ((N % size) > rank);
3081     else n = newMat->cmap->n;
3082     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3083     cstart = cend - n;
3084   } else {
3085     cstart = rstart;
3086     cend   = rend;
3087     n      = cend - cstart;
3088   }
3089 
3090   /* loop over local rows, determining number of off diagonal entries */
3091   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3092   jj = 0;
3093   for (i=0; i<m; i++) {
3094     for (j=0; j<ourlens[i]; j++) {
3095       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3096       jj++;
3097     }
3098   }
3099 
3100   for (i=0; i<m; i++) {
3101     ourlens[i] -= offlens[i];
3102   }
3103   if (!sizesset) {
3104     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3105   }
3106   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3107 
3108   for (i=0; i<m; i++) {
3109     ourlens[i] += offlens[i];
3110   }
3111 
3112   if (!rank) {
3113     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3114 
3115     /* read in my part of the matrix numerical values  */
3116     nz   = procsnz[0];
3117     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3118 
3119     /* insert into matrix */
3120     jj      = rstart;
3121     smycols = mycols;
3122     svals   = vals;
3123     for (i=0; i<m; i++) {
3124       ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3125       smycols += ourlens[i];
3126       svals   += ourlens[i];
3127       jj++;
3128     }
3129 
3130     /* read in other processors and ship out */
3131     for (i=1; i<size; i++) {
3132       nz     = procsnz[i];
3133       ierr   = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3134       ierr   = MPILong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3135     }
3136     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3137   } else {
3138     /* receive numeric values */
3139     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3140 
3141     /* receive message of values*/
3142     ierr   = MPILong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3143 
3144     /* insert into matrix */
3145     jj      = rstart;
3146     smycols = mycols;
3147     svals   = vals;
3148     for (i=0; i<m; i++) {
3149       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3150       smycols += ourlens[i];
3151       svals   += ourlens[i];
3152       jj++;
3153     }
3154   }
3155   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3156   ierr = PetscFree(vals);CHKERRQ(ierr);
3157   ierr = PetscFree(mycols);CHKERRQ(ierr);
3158   ierr = PetscFree(rowners);CHKERRQ(ierr);
3159 
3160   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3168 {
3169   PetscErrorCode ierr;
3170   IS             iscol_local;
3171   PetscInt       csize;
3172 
3173   PetscFunctionBegin;
3174   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3175   if (call == MAT_REUSE_MATRIX) {
3176     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3177     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3178   } else {
3179     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3180   }
3181   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3182   if (call == MAT_INITIAL_MATRIX) {
3183     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3184     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
3185   }
3186   PetscFunctionReturn(0);
3187 }
3188 
3189 #undef __FUNCT__
3190 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3191 /*
3192     Not great since it makes two copies of the submatrix, first an SeqAIJ
3193   in local and then by concatenating the local matrices the end result.
3194   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3195 
3196   Note: This requires a sequential iscol with all indices.
3197 */
3198 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3199 {
3200   PetscErrorCode ierr;
3201   PetscMPIInt    rank,size;
3202   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
3203   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3204   Mat            *local,M,Mreuse;
3205   MatScalar      *vwork,*aa;
3206   MPI_Comm       comm = ((PetscObject)mat)->comm;
3207   Mat_SeqAIJ     *aij;
3208 
3209 
3210   PetscFunctionBegin;
3211   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3212   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3213 
3214   if (call ==  MAT_REUSE_MATRIX) {
3215     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
3216     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3217     local = &Mreuse;
3218     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
3219   } else {
3220     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
3221     Mreuse = *local;
3222     ierr   = PetscFree(local);CHKERRQ(ierr);
3223   }
3224 
3225   /*
3226       m - number of local rows
3227       n - number of columns (same on all processors)
3228       rstart - first row in new global matrix generated
3229   */
3230   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3231   if (call == MAT_INITIAL_MATRIX) {
3232     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3233     ii  = aij->i;
3234     jj  = aij->j;
3235 
3236     /*
3237         Determine the number of non-zeros in the diagonal and off-diagonal
3238         portions of the matrix in order to do correct preallocation
3239     */
3240 
3241     /* first get start and end of "diagonal" columns */
3242     if (csize == PETSC_DECIDE) {
3243       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3244       if (mglobal == n) { /* square matrix */
3245 	nlocal = m;
3246       } else {
3247         nlocal = n/size + ((n % size) > rank);
3248       }
3249     } else {
3250       nlocal = csize;
3251     }
3252     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3253     rstart = rend - nlocal;
3254     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3255 
3256     /* next, compute all the lengths */
3257     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
3258     olens = dlens + m;
3259     for (i=0; i<m; i++) {
3260       jend = ii[i+1] - ii[i];
3261       olen = 0;
3262       dlen = 0;
3263       for (j=0; j<jend; j++) {
3264         if (*jj < rstart || *jj >= rend) olen++;
3265         else dlen++;
3266         jj++;
3267       }
3268       olens[i] = olen;
3269       dlens[i] = dlen;
3270     }
3271     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3272     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3273     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3274     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3275     ierr = PetscFree(dlens);CHKERRQ(ierr);
3276   } else {
3277     PetscInt ml,nl;
3278 
3279     M = *newmat;
3280     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3281     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3282     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3283     /*
3284          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3285        rather than the slower MatSetValues().
3286     */
3287     M->was_assembled = PETSC_TRUE;
3288     M->assembled     = PETSC_FALSE;
3289   }
3290   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3291   aij = (Mat_SeqAIJ*)(Mreuse)->data;
3292   ii  = aij->i;
3293   jj  = aij->j;
3294   aa  = aij->a;
3295   for (i=0; i<m; i++) {
3296     row   = rstart + i;
3297     nz    = ii[i+1] - ii[i];
3298     cwork = jj;     jj += nz;
3299     vwork = aa;     aa += nz;
3300     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3301   }
3302 
3303   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3304   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305   *newmat = M;
3306 
3307   /* save submatrix used in processor for next request */
3308   if (call ==  MAT_INITIAL_MATRIX) {
3309     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3310     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
3311   }
3312 
3313   PetscFunctionReturn(0);
3314 }
3315 
3316 EXTERN_C_BEGIN
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3319 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3320 {
3321   PetscInt       m,cstart, cend,j,nnz,i,d;
3322   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3323   const PetscInt *JJ;
3324   PetscScalar    *values;
3325   PetscErrorCode ierr;
3326 
3327   PetscFunctionBegin;
3328   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3329 
3330   ierr = PetscLayoutSetBlockSize(B->rmap,1);CHKERRQ(ierr);
3331   ierr = PetscLayoutSetBlockSize(B->cmap,1);CHKERRQ(ierr);
3332   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3333   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3334   m      = B->rmap->n;
3335   cstart = B->cmap->rstart;
3336   cend   = B->cmap->rend;
3337   rstart = B->rmap->rstart;
3338 
3339   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3340 
3341 #if defined(PETSC_USE_DEBUGGING)
3342   for (i=0; i<m; i++) {
3343     nnz     = Ii[i+1]- Ii[i];
3344     JJ      = J + Ii[i];
3345     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3346     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3347     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3348   }
3349 #endif
3350 
3351   for (i=0; i<m; i++) {
3352     nnz     = Ii[i+1]- Ii[i];
3353     JJ      = J + Ii[i];
3354     nnz_max = PetscMax(nnz_max,nnz);
3355     d       = 0;
3356     for (j=0; j<nnz; j++) {
3357       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3358     }
3359     d_nnz[i] = d;
3360     o_nnz[i] = nnz - d;
3361   }
3362   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3363   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3364 
3365   if (v) values = (PetscScalar*)v;
3366   else {
3367     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3368     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3369   }
3370 
3371   for (i=0; i<m; i++) {
3372     ii   = i + rstart;
3373     nnz  = Ii[i+1]- Ii[i];
3374     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3375   }
3376   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3377   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3378 
3379   if (!v) {
3380     ierr = PetscFree(values);CHKERRQ(ierr);
3381   }
3382   PetscFunctionReturn(0);
3383 }
3384 EXTERN_C_END
3385 
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3388 /*@
3389    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3390    (the default parallel PETSc format).
3391 
3392    Collective on MPI_Comm
3393 
3394    Input Parameters:
3395 +  B - the matrix
3396 .  i - the indices into j for the start of each local row (starts with zero)
3397 .  j - the column indices for each local row (starts with zero)
3398 -  v - optional values in the matrix
3399 
3400    Level: developer
3401 
3402    Notes:
3403        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3404      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3405      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3406 
3407        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3408 
3409        The format which is used for the sparse matrix input, is equivalent to a
3410     row-major ordering.. i.e for the following matrix, the input data expected is
3411     as shown:
3412 
3413         1 0 0
3414         2 0 3     P0
3415        -------
3416         4 5 6     P1
3417 
3418      Process0 [P0]: rows_owned=[0,1]
3419         i =  {0,1,3}  [size = nrow+1  = 2+1]
3420         j =  {0,0,2}  [size = nz = 6]
3421         v =  {1,2,3}  [size = nz = 6]
3422 
3423      Process1 [P1]: rows_owned=[2]
3424         i =  {0,3}    [size = nrow+1  = 1+1]
3425         j =  {0,1,2}  [size = nz = 6]
3426         v =  {4,5,6}  [size = nz = 6]
3427 
3428 .keywords: matrix, aij, compressed row, sparse, parallel
3429 
3430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
3431           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3432 @*/
3433 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3434 {
3435   PetscErrorCode ierr;
3436 
3437   PetscFunctionBegin;
3438   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3439   PetscFunctionReturn(0);
3440 }
3441 
3442 #undef __FUNCT__
3443 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3444 /*@C
3445    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3446    (the default parallel PETSc format).  For good matrix assembly performance
3447    the user should preallocate the matrix storage by setting the parameters
3448    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3449    performance can be increased by more than a factor of 50.
3450 
3451    Collective on MPI_Comm
3452 
3453    Input Parameters:
3454 +  A - the matrix
3455 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3456            (same value is used for all local rows)
3457 .  d_nnz - array containing the number of nonzeros in the various rows of the
3458            DIAGONAL portion of the local submatrix (possibly different for each row)
3459            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3460            The size of this array is equal to the number of local rows, i.e 'm'.
3461            You must leave room for the diagonal entry even if it is zero.
3462 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3463            submatrix (same value is used for all local rows).
3464 -  o_nnz - array containing the number of nonzeros in the various rows of the
3465            OFF-DIAGONAL portion of the local submatrix (possibly different for
3466            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3467            structure. The size of this array is equal to the number
3468            of local rows, i.e 'm'.
3469 
3470    If the *_nnz parameter is given then the *_nz parameter is ignored
3471 
3472    The AIJ format (also called the Yale sparse matrix format or
3473    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3474    storage.  The stored row and column indices begin with zero.
3475    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3476 
3477    The parallel matrix is partitioned such that the first m0 rows belong to
3478    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3479    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3480 
3481    The DIAGONAL portion of the local submatrix of a processor can be defined
3482    as the submatrix which is obtained by extraction the part corresponding to
3483    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3484    first row that belongs to the processor, r2 is the last row belonging to
3485    the this processor, and c1-c2 is range of indices of the local part of a
3486    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3487    common case of a square matrix, the row and column ranges are the same and
3488    the DIAGONAL part is also square. The remaining portion of the local
3489    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3490 
3491    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3492 
3493    You can call MatGetInfo() to get information on how effective the preallocation was;
3494    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3495    You can also run with the option -info and look for messages with the string
3496    malloc in them to see if additional memory allocation was needed.
3497 
3498    Example usage:
3499 
3500    Consider the following 8x8 matrix with 34 non-zero values, that is
3501    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3502    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3503    as follows:
3504 
3505 .vb
3506             1  2  0  |  0  3  0  |  0  4
3507     Proc0   0  5  6  |  7  0  0  |  8  0
3508             9  0 10  | 11  0  0  | 12  0
3509     -------------------------------------
3510            13  0 14  | 15 16 17  |  0  0
3511     Proc1   0 18  0  | 19 20 21  |  0  0
3512             0  0  0  | 22 23  0  | 24  0
3513     -------------------------------------
3514     Proc2  25 26 27  |  0  0 28  | 29  0
3515            30  0  0  | 31 32 33  |  0 34
3516 .ve
3517 
3518    This can be represented as a collection of submatrices as:
3519 
3520 .vb
3521       A B C
3522       D E F
3523       G H I
3524 .ve
3525 
3526    Where the submatrices A,B,C are owned by proc0, D,E,F are
3527    owned by proc1, G,H,I are owned by proc2.
3528 
3529    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3530    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3531    The 'M','N' parameters are 8,8, and have the same values on all procs.
3532 
3533    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3534    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3535    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3536    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3537    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3538    matrix, ans [DF] as another SeqAIJ matrix.
3539 
3540    When d_nz, o_nz parameters are specified, d_nz storage elements are
3541    allocated for every row of the local diagonal submatrix, and o_nz
3542    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3543    One way to choose d_nz and o_nz is to use the max nonzerors per local
3544    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3545    In this case, the values of d_nz,o_nz are:
3546 .vb
3547      proc0 : dnz = 2, o_nz = 2
3548      proc1 : dnz = 3, o_nz = 2
3549      proc2 : dnz = 1, o_nz = 4
3550 .ve
3551    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3552    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3553    for proc3. i.e we are using 12+15+10=37 storage locations to store
3554    34 values.
3555 
3556    When d_nnz, o_nnz parameters are specified, the storage is specified
3557    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3558    In the above case the values for d_nnz,o_nnz are:
3559 .vb
3560      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3561      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3562      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3563 .ve
3564    Here the space allocated is sum of all the above values i.e 34, and
3565    hence pre-allocation is perfect.
3566 
3567    Level: intermediate
3568 
3569 .keywords: matrix, aij, compressed row, sparse, parallel
3570 
3571 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3572           MPIAIJ, MatGetInfo()
3573 @*/
3574 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3575 {
3576   PetscErrorCode ierr;
3577 
3578   PetscFunctionBegin;
3579   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 #undef __FUNCT__
3584 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3585 /*@
3586      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3587          CSR format the local rows.
3588 
3589    Collective on MPI_Comm
3590 
3591    Input Parameters:
3592 +  comm - MPI communicator
3593 .  m - number of local rows (Cannot be PETSC_DECIDE)
3594 .  n - This value should be the same as the local size used in creating the
3595        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3596        calculated if N is given) For square matrices n is almost always m.
3597 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3598 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3599 .   i - row indices
3600 .   j - column indices
3601 -   a - matrix values
3602 
3603    Output Parameter:
3604 .   mat - the matrix
3605 
3606    Level: intermediate
3607 
3608    Notes:
3609        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3610      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3611      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3612 
3613        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3614 
3615        The format which is used for the sparse matrix input, is equivalent to a
3616     row-major ordering.. i.e for the following matrix, the input data expected is
3617     as shown:
3618 
3619         1 0 0
3620         2 0 3     P0
3621        -------
3622         4 5 6     P1
3623 
3624      Process0 [P0]: rows_owned=[0,1]
3625         i =  {0,1,3}  [size = nrow+1  = 2+1]
3626         j =  {0,0,2}  [size = nz = 6]
3627         v =  {1,2,3}  [size = nz = 6]
3628 
3629      Process1 [P1]: rows_owned=[2]
3630         i =  {0,3}    [size = nrow+1  = 1+1]
3631         j =  {0,1,2}  [size = nz = 6]
3632         v =  {4,5,6}  [size = nz = 6]
3633 
3634 .keywords: matrix, aij, compressed row, sparse, parallel
3635 
3636 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3637           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3638 @*/
3639 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3640 {
3641   PetscErrorCode ierr;
3642 
3643  PetscFunctionBegin;
3644   if (i[0]) {
3645     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3646   }
3647   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3648   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3649   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3650   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3651   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3652   PetscFunctionReturn(0);
3653 }
3654 
3655 #undef __FUNCT__
3656 #define __FUNCT__ "MatCreateMPIAIJ"
3657 /*@C
3658    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3659    (the default parallel PETSc format).  For good matrix assembly performance
3660    the user should preallocate the matrix storage by setting the parameters
3661    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3662    performance can be increased by more than a factor of 50.
3663 
3664    Collective on MPI_Comm
3665 
3666    Input Parameters:
3667 +  comm - MPI communicator
3668 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3669            This value should be the same as the local size used in creating the
3670            y vector for the matrix-vector product y = Ax.
3671 .  n - This value should be the same as the local size used in creating the
3672        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3673        calculated if N is given) For square matrices n is almost always m.
3674 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3675 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3676 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3677            (same value is used for all local rows)
3678 .  d_nnz - array containing the number of nonzeros in the various rows of the
3679            DIAGONAL portion of the local submatrix (possibly different for each row)
3680            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3681            The size of this array is equal to the number of local rows, i.e 'm'.
3682            You must leave room for the diagonal entry even if it is zero.
3683 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3684            submatrix (same value is used for all local rows).
3685 -  o_nnz - array containing the number of nonzeros in the various rows of the
3686            OFF-DIAGONAL portion of the local submatrix (possibly different for
3687            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3688            structure. The size of this array is equal to the number
3689            of local rows, i.e 'm'.
3690 
3691    Output Parameter:
3692 .  A - the matrix
3693 
3694    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3695    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3696    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3697 
3698    Notes:
3699    If the *_nnz parameter is given then the *_nz parameter is ignored
3700 
3701    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3702    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3703    storage requirements for this matrix.
3704 
3705    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3706    processor than it must be used on all processors that share the object for
3707    that argument.
3708 
3709    The user MUST specify either the local or global matrix dimensions
3710    (possibly both).
3711 
3712    The parallel matrix is partitioned across processors such that the
3713    first m0 rows belong to process 0, the next m1 rows belong to
3714    process 1, the next m2 rows belong to process 2 etc.. where
3715    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3716    values corresponding to [m x N] submatrix.
3717 
3718    The columns are logically partitioned with the n0 columns belonging
3719    to 0th partition, the next n1 columns belonging to the next
3720    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3721 
3722    The DIAGONAL portion of the local submatrix on any given processor
3723    is the submatrix corresponding to the rows and columns m,n
3724    corresponding to the given processor. i.e diagonal matrix on
3725    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3726    etc. The remaining portion of the local submatrix [m x (N-n)]
3727    constitute the OFF-DIAGONAL portion. The example below better
3728    illustrates this concept.
3729 
3730    For a square global matrix we define each processor's diagonal portion
3731    to be its local rows and the corresponding columns (a square submatrix);
3732    each processor's off-diagonal portion encompasses the remainder of the
3733    local matrix (a rectangular submatrix).
3734 
3735    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3736 
3737    When calling this routine with a single process communicator, a matrix of
3738    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3739    type of communicator, use the construction mechanism:
3740      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
3741 
3742    By default, this format uses inodes (identical nodes) when possible.
3743    We search for consecutive rows with the same nonzero structure, thereby
3744    reusing matrix information to achieve increased efficiency.
3745 
3746    Options Database Keys:
3747 +  -mat_no_inode  - Do not use inodes
3748 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3749 -  -mat_aij_oneindex - Internally use indexing starting at 1
3750         rather than 0.  Note that when calling MatSetValues(),
3751         the user still MUST index entries starting at 0!
3752 
3753 
3754    Example usage:
3755 
3756    Consider the following 8x8 matrix with 34 non-zero values, that is
3757    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3758    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3759    as follows:
3760 
3761 .vb
3762             1  2  0  |  0  3  0  |  0  4
3763     Proc0   0  5  6  |  7  0  0  |  8  0
3764             9  0 10  | 11  0  0  | 12  0
3765     -------------------------------------
3766            13  0 14  | 15 16 17  |  0  0
3767     Proc1   0 18  0  | 19 20 21  |  0  0
3768             0  0  0  | 22 23  0  | 24  0
3769     -------------------------------------
3770     Proc2  25 26 27  |  0  0 28  | 29  0
3771            30  0  0  | 31 32 33  |  0 34
3772 .ve
3773 
3774    This can be represented as a collection of submatrices as:
3775 
3776 .vb
3777       A B C
3778       D E F
3779       G H I
3780 .ve
3781 
3782    Where the submatrices A,B,C are owned by proc0, D,E,F are
3783    owned by proc1, G,H,I are owned by proc2.
3784 
3785    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3786    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3787    The 'M','N' parameters are 8,8, and have the same values on all procs.
3788 
3789    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3790    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3791    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3792    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3793    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3794    matrix, ans [DF] as another SeqAIJ matrix.
3795 
3796    When d_nz, o_nz parameters are specified, d_nz storage elements are
3797    allocated for every row of the local diagonal submatrix, and o_nz
3798    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3799    One way to choose d_nz and o_nz is to use the max nonzerors per local
3800    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3801    In this case, the values of d_nz,o_nz are:
3802 .vb
3803      proc0 : dnz = 2, o_nz = 2
3804      proc1 : dnz = 3, o_nz = 2
3805      proc2 : dnz = 1, o_nz = 4
3806 .ve
3807    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3808    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3809    for proc3. i.e we are using 12+15+10=37 storage locations to store
3810    34 values.
3811 
3812    When d_nnz, o_nnz parameters are specified, the storage is specified
3813    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3814    In the above case the values for d_nnz,o_nnz are:
3815 .vb
3816      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3817      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3818      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3819 .ve
3820    Here the space allocated is sum of all the above values i.e 34, and
3821    hence pre-allocation is perfect.
3822 
3823    Level: intermediate
3824 
3825 .keywords: matrix, aij, compressed row, sparse, parallel
3826 
3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3828           MPIAIJ, MatCreateMPIAIJWithArrays()
3829 @*/
3830 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3831 {
3832   PetscErrorCode ierr;
3833   PetscMPIInt    size;
3834 
3835   PetscFunctionBegin;
3836   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3837   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3838   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3839   if (size > 1) {
3840     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3841     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3842   } else {
3843     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3844     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3845   }
3846   PetscFunctionReturn(0);
3847 }
3848 
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3852 {
3853   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3854 
3855   PetscFunctionBegin;
3856   *Ad     = a->A;
3857   *Ao     = a->B;
3858   *colmap = a->garray;
3859   PetscFunctionReturn(0);
3860 }
3861 
3862 #undef __FUNCT__
3863 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3864 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3865 {
3866   PetscErrorCode ierr;
3867   PetscInt       i;
3868   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3869 
3870   PetscFunctionBegin;
3871   if (coloring->ctype == IS_COLORING_GLOBAL) {
3872     ISColoringValue *allcolors,*colors;
3873     ISColoring      ocoloring;
3874 
3875     /* set coloring for diagonal portion */
3876     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3877 
3878     /* set coloring for off-diagonal portion */
3879     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3880     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3881     for (i=0; i<a->B->cmap->n; i++) {
3882       colors[i] = allcolors[a->garray[i]];
3883     }
3884     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3885     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3886     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3887     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3888   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3889     ISColoringValue *colors;
3890     PetscInt        *larray;
3891     ISColoring      ocoloring;
3892 
3893     /* set coloring for diagonal portion */
3894     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3895     for (i=0; i<a->A->cmap->n; i++) {
3896       larray[i] = i + A->cmap->rstart;
3897     }
3898     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3899     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3900     for (i=0; i<a->A->cmap->n; i++) {
3901       colors[i] = coloring->colors[larray[i]];
3902     }
3903     ierr = PetscFree(larray);CHKERRQ(ierr);
3904     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3905     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3906     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3907 
3908     /* set coloring for off-diagonal portion */
3909     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3910     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3911     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3912     for (i=0; i<a->B->cmap->n; i++) {
3913       colors[i] = coloring->colors[larray[i]];
3914     }
3915     ierr = PetscFree(larray);CHKERRQ(ierr);
3916     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
3917     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3918     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3919   } else {
3920     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3921   }
3922 
3923   PetscFunctionReturn(0);
3924 }
3925 
3926 #if defined(PETSC_HAVE_ADIC)
3927 #undef __FUNCT__
3928 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3929 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3930 {
3931   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3932   PetscErrorCode ierr;
3933 
3934   PetscFunctionBegin;
3935   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3936   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3937   PetscFunctionReturn(0);
3938 }
3939 #endif
3940 
3941 #undef __FUNCT__
3942 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3943 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3944 {
3945   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3946   PetscErrorCode ierr;
3947 
3948   PetscFunctionBegin;
3949   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3950   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3951   PetscFunctionReturn(0);
3952 }
3953 
3954 #undef __FUNCT__
3955 #define __FUNCT__ "MatMerge"
3956 /*@
3957       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3958                  matrices from each processor
3959 
3960     Collective on MPI_Comm
3961 
3962    Input Parameters:
3963 +    comm - the communicators the parallel matrix will live on
3964 .    inmat - the input sequential matrices
3965 .    n - number of local columns (or PETSC_DECIDE)
3966 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3967 
3968    Output Parameter:
3969 .    outmat - the parallel matrix generated
3970 
3971     Level: advanced
3972 
3973    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3974 
3975 @*/
3976 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3977 {
3978   PetscErrorCode ierr;
3979   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3980   PetscInt       *indx;
3981   PetscScalar    *values;
3982 
3983   PetscFunctionBegin;
3984   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3985   if (scall == MAT_INITIAL_MATRIX){
3986     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3987     if (n == PETSC_DECIDE){
3988       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3989     }
3990     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3991     rstart -= m;
3992 
3993     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3994     for (i=0;i<m;i++) {
3995       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3996       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3997       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3998     }
3999     /* This routine will ONLY return MPIAIJ type matrix */
4000     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4001     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4002     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4003     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4004     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4005 
4006   } else if (scall == MAT_REUSE_MATRIX){
4007     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
4008   } else {
4009     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4010   }
4011 
4012   for (i=0;i<m;i++) {
4013     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4014     Ii    = i + rstart;
4015     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4016     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4017   }
4018   ierr = MatDestroy(inmat);CHKERRQ(ierr);
4019   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4020   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4021 
4022   PetscFunctionReturn(0);
4023 }
4024 
4025 #undef __FUNCT__
4026 #define __FUNCT__ "MatFileSplit"
4027 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4028 {
4029   PetscErrorCode    ierr;
4030   PetscMPIInt       rank;
4031   PetscInt          m,N,i,rstart,nnz;
4032   size_t            len;
4033   const PetscInt    *indx;
4034   PetscViewer       out;
4035   char              *name;
4036   Mat               B;
4037   const PetscScalar *values;
4038 
4039   PetscFunctionBegin;
4040   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4041   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4042   /* Should this be the type of the diagonal block of A? */
4043   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4044   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4045   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4046   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
4047   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4048   for (i=0;i<m;i++) {
4049     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4050     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4051     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4052   }
4053   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4054   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4055 
4056   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
4057   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4058   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4059   sprintf(name,"%s.%d",outfile,rank);
4060   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4061   ierr = PetscFree(name);
4062   ierr = MatView(B,out);CHKERRQ(ierr);
4063   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
4064   ierr = MatDestroy(B);CHKERRQ(ierr);
4065   PetscFunctionReturn(0);
4066 }
4067 
4068 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
4069 #undef __FUNCT__
4070 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4071 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4072 {
4073   PetscErrorCode       ierr;
4074   Mat_Merge_SeqsToMPI  *merge;
4075   PetscContainer       container;
4076 
4077   PetscFunctionBegin;
4078   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4079   if (container) {
4080     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4081     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4082     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4083     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4084     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4085     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4086     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4087     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4088     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4089     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4090     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4091     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4092     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4093     ierr = PetscLayoutDestroy(merge->rowmap);CHKERRQ(ierr);
4094 
4095     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
4096     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4097   }
4098   ierr = PetscFree(merge);CHKERRQ(ierr);
4099 
4100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4101   PetscFunctionReturn(0);
4102 }
4103 
4104 #include "../src/mat/utils/freespace.h"
4105 #include "petscbt.h"
4106 
4107 #undef __FUNCT__
4108 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
4109 /*@C
4110       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
4111                  matrices from each processor
4112 
4113     Collective on MPI_Comm
4114 
4115    Input Parameters:
4116 +    comm - the communicators the parallel matrix will live on
4117 .    seqmat - the input sequential matrices
4118 .    m - number of local rows (or PETSC_DECIDE)
4119 .    n - number of local columns (or PETSC_DECIDE)
4120 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4121 
4122    Output Parameter:
4123 .    mpimat - the parallel matrix generated
4124 
4125     Level: advanced
4126 
4127    Notes:
4128      The dimensions of the sequential matrix in each processor MUST be the same.
4129      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4130      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4131 @*/
4132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
4133 {
4134   PetscErrorCode       ierr;
4135   MPI_Comm             comm=((PetscObject)mpimat)->comm;
4136   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4137   PetscMPIInt          size,rank,taga,*len_s;
4138   PetscInt             N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj=a->j;
4139   PetscInt             proc,m;
4140   PetscInt             **buf_ri,**buf_rj;
4141   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4142   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
4143   MPI_Request          *s_waits,*r_waits;
4144   MPI_Status           *status;
4145   MatScalar            *aa=a->a;
4146   MatScalar            **abuf_r,*ba_i;
4147   Mat_Merge_SeqsToMPI  *merge;
4148   PetscContainer       container;
4149 
4150   PetscFunctionBegin;
4151   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4152 
4153   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4154   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4155 
4156   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
4157   if (container) {
4158     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
4159   }
4160   bi     = merge->bi;
4161   bj     = merge->bj;
4162   buf_ri = merge->buf_ri;
4163   buf_rj = merge->buf_rj;
4164 
4165   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4166   owners = merge->rowmap->range;
4167   len_s  = merge->len_s;
4168 
4169   /* send and recv matrix values */
4170   /*-----------------------------*/
4171   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4172   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4173 
4174   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4175   for (proc=0,k=0; proc<size; proc++){
4176     if (!len_s[proc]) continue;
4177     i = owners[proc];
4178     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4179     k++;
4180   }
4181 
4182   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4183   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4184   ierr = PetscFree(status);CHKERRQ(ierr);
4185 
4186   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4187   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4188 
4189   /* insert mat values of mpimat */
4190   /*----------------------------*/
4191   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4192   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4193 
4194   for (k=0; k<merge->nrecv; k++){
4195     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4196     nrows = *(buf_ri_k[k]);
4197     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4198     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4199   }
4200 
4201   /* set values of ba */
4202   m = merge->rowmap->n;
4203   for (i=0; i<m; i++) {
4204     arow = owners[rank] + i;
4205     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4206     bnzi = bi[i+1] - bi[i];
4207     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4208 
4209     /* add local non-zero vals of this proc's seqmat into ba */
4210     anzi = ai[arow+1] - ai[arow];
4211     aj   = a->j + ai[arow];
4212     aa   = a->a + ai[arow];
4213     nextaj = 0;
4214     for (j=0; nextaj<anzi; j++){
4215       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4216         ba_i[j] += aa[nextaj++];
4217       }
4218     }
4219 
4220     /* add received vals into ba */
4221     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4222       /* i-th row */
4223       if (i == *nextrow[k]) {
4224         anzi = *(nextai[k]+1) - *nextai[k];
4225         aj   = buf_rj[k] + *(nextai[k]);
4226         aa   = abuf_r[k] + *(nextai[k]);
4227         nextaj = 0;
4228         for (j=0; nextaj<anzi; j++){
4229           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
4230             ba_i[j] += aa[nextaj++];
4231           }
4232         }
4233         nextrow[k]++; nextai[k]++;
4234       }
4235     }
4236     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4237   }
4238   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4239   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4240 
4241   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4242   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4243   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4244   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4245   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4246   PetscFunctionReturn(0);
4247 }
4248 
4249 #undef __FUNCT__
4250 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
4251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4252 {
4253   PetscErrorCode       ierr;
4254   Mat                  B_mpi;
4255   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
4256   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4257   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
4258   PetscInt             M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4259   PetscInt             len,proc,*dnz,*onz;
4260   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4261   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4262   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
4263   MPI_Status           *status;
4264   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
4265   PetscBT              lnkbt;
4266   Mat_Merge_SeqsToMPI  *merge;
4267   PetscContainer       container;
4268 
4269   PetscFunctionBegin;
4270   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4271 
4272   /* make sure it is a PETSc comm */
4273   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
4274   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4275   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4276 
4277   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
4278   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4279 
4280   /* determine row ownership */
4281   /*---------------------------------------------------------*/
4282   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4283   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4284   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4285   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4286   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4287   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
4288   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
4289 
4290   m      = merge->rowmap->n;
4291   M      = merge->rowmap->N;
4292   owners = merge->rowmap->range;
4293 
4294   /* determine the number of messages to send, their lengths */
4295   /*---------------------------------------------------------*/
4296   len_s  = merge->len_s;
4297 
4298   len = 0;  /* length of buf_si[] */
4299   merge->nsend = 0;
4300   for (proc=0; proc<size; proc++){
4301     len_si[proc] = 0;
4302     if (proc == rank){
4303       len_s[proc] = 0;
4304     } else {
4305       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4306       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4307     }
4308     if (len_s[proc]) {
4309       merge->nsend++;
4310       nrows = 0;
4311       for (i=owners[proc]; i<owners[proc+1]; i++){
4312         if (ai[i+1] > ai[i]) nrows++;
4313       }
4314       len_si[proc] = 2*(nrows+1);
4315       len += len_si[proc];
4316     }
4317   }
4318 
4319   /* determine the number and length of messages to receive for ij-structure */
4320   /*-------------------------------------------------------------------------*/
4321   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4322   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4323 
4324   /* post the Irecv of j-structure */
4325   /*-------------------------------*/
4326   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4327   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4328 
4329   /* post the Isend of j-structure */
4330   /*--------------------------------*/
4331   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
4332 
4333   for (proc=0, k=0; proc<size; proc++){
4334     if (!len_s[proc]) continue;
4335     i = owners[proc];
4336     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4337     k++;
4338   }
4339 
4340   /* receives and sends of j-structure are complete */
4341   /*------------------------------------------------*/
4342   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4343   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4344 
4345   /* send and recv i-structure */
4346   /*---------------------------*/
4347   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4348   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4349 
4350   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
4351   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4352   for (proc=0,k=0; proc<size; proc++){
4353     if (!len_s[proc]) continue;
4354     /* form outgoing message for i-structure:
4355          buf_si[0]:                 nrows to be sent
4356                [1:nrows]:           row index (global)
4357                [nrows+1:2*nrows+1]: i-structure index
4358     */
4359     /*-------------------------------------------*/
4360     nrows = len_si[proc]/2 - 1;
4361     buf_si_i    = buf_si + nrows+1;
4362     buf_si[0]   = nrows;
4363     buf_si_i[0] = 0;
4364     nrows = 0;
4365     for (i=owners[proc]; i<owners[proc+1]; i++){
4366       anzi = ai[i+1] - ai[i];
4367       if (anzi) {
4368         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4369         buf_si[nrows+1] = i-owners[proc]; /* local row index */
4370         nrows++;
4371       }
4372     }
4373     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4374     k++;
4375     buf_si += len_si[proc];
4376   }
4377 
4378   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4379   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4380 
4381   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4382   for (i=0; i<merge->nrecv; i++){
4383     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4384   }
4385 
4386   ierr = PetscFree(len_si);CHKERRQ(ierr);
4387   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4388   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4389   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4390   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4391   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4392   ierr = PetscFree(status);CHKERRQ(ierr);
4393 
4394   /* compute a local seq matrix in each processor */
4395   /*----------------------------------------------*/
4396   /* allocate bi array and free space for accumulating nonzero column info */
4397   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
4398   bi[0] = 0;
4399 
4400   /* create and initialize a linked list */
4401   nlnk = N+1;
4402   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4403 
4404   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4405   len = 0;
4406   len  = ai[owners[rank+1]] - ai[owners[rank]];
4407   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4408   current_space = free_space;
4409 
4410   /* determine symbolic info for each local row */
4411   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4412 
4413   for (k=0; k<merge->nrecv; k++){
4414     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4415     nrows = *buf_ri_k[k];
4416     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4417     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
4418   }
4419 
4420   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4421   len = 0;
4422   for (i=0;i<m;i++) {
4423     bnzi   = 0;
4424     /* add local non-zero cols of this proc's seqmat into lnk */
4425     arow   = owners[rank] + i;
4426     anzi   = ai[arow+1] - ai[arow];
4427     aj     = a->j + ai[arow];
4428     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4429     bnzi += nlnk;
4430     /* add received col data into lnk */
4431     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
4432       if (i == *nextrow[k]) { /* i-th row */
4433         anzi = *(nextai[k]+1) - *nextai[k];
4434         aj   = buf_rj[k] + *nextai[k];
4435         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4436         bnzi += nlnk;
4437         nextrow[k]++; nextai[k]++;
4438       }
4439     }
4440     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4441 
4442     /* if free space is not available, make more free space */
4443     if (current_space->local_remaining<bnzi) {
4444       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4445       nspacedouble++;
4446     }
4447     /* copy data into free space, then initialize lnk */
4448     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4449     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4450 
4451     current_space->array           += bnzi;
4452     current_space->local_used      += bnzi;
4453     current_space->local_remaining -= bnzi;
4454 
4455     bi[i+1] = bi[i] + bnzi;
4456   }
4457 
4458   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4459 
4460   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
4461   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4462   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4463 
4464   /* create symbolic parallel matrix B_mpi */
4465   /*---------------------------------------*/
4466   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4467   if (n==PETSC_DECIDE) {
4468     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4469   } else {
4470     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4471   }
4472   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4473   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4474   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4475 
4476   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
4477   B_mpi->assembled     = PETSC_FALSE;
4478   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
4479   merge->bi            = bi;
4480   merge->bj            = bj;
4481   merge->buf_ri        = buf_ri;
4482   merge->buf_rj        = buf_rj;
4483   merge->coi           = PETSC_NULL;
4484   merge->coj           = PETSC_NULL;
4485   merge->owners_co     = PETSC_NULL;
4486 
4487   /* attach the supporting struct to B_mpi for reuse */
4488   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4489   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4490   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4491   *mpimat = B_mpi;
4492 
4493   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4494   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4495   PetscFunctionReturn(0);
4496 }
4497 
4498 #undef __FUNCT__
4499 #define __FUNCT__ "MatMerge_SeqsToMPI"
4500 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4501 {
4502   PetscErrorCode   ierr;
4503 
4504   PetscFunctionBegin;
4505   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4506   if (scall == MAT_INITIAL_MATRIX){
4507     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4508   }
4509   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4510   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4511   PetscFunctionReturn(0);
4512 }
4513 
4514 #undef __FUNCT__
4515 #define __FUNCT__ "MatGetLocalMat"
4516 /*@
4517      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4518 
4519     Not Collective
4520 
4521    Input Parameters:
4522 +    A - the matrix
4523 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4524 
4525    Output Parameter:
4526 .    A_loc - the local sequential matrix generated
4527 
4528     Level: developer
4529 
4530 @*/
4531 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4532 {
4533   PetscErrorCode  ierr;
4534   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4535   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4536   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4537   MatScalar       *aa=a->a,*ba=b->a,*cam;
4538   PetscScalar     *ca;
4539   PetscInt        am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4540   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4541 
4542   PetscFunctionBegin;
4543   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4544   if (scall == MAT_INITIAL_MATRIX){
4545     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4546     ci[0] = 0;
4547     for (i=0; i<am; i++){
4548       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4549     }
4550     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4551     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4552     k = 0;
4553     for (i=0; i<am; i++) {
4554       ncols_o = bi[i+1] - bi[i];
4555       ncols_d = ai[i+1] - ai[i];
4556       /* off-diagonal portion of A */
4557       for (jo=0; jo<ncols_o; jo++) {
4558         col = cmap[*bj];
4559         if (col >= cstart) break;
4560         cj[k]   = col; bj++;
4561         ca[k++] = *ba++;
4562       }
4563       /* diagonal portion of A */
4564       for (j=0; j<ncols_d; j++) {
4565         cj[k]   = cstart + *aj++;
4566         ca[k++] = *aa++;
4567       }
4568       /* off-diagonal portion of A */
4569       for (j=jo; j<ncols_o; j++) {
4570         cj[k]   = cmap[*bj++];
4571         ca[k++] = *ba++;
4572       }
4573     }
4574     /* put together the new matrix */
4575     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4576     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4577     /* Since these are PETSc arrays, change flags to free them as necessary. */
4578     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4579     mat->free_a  = PETSC_TRUE;
4580     mat->free_ij = PETSC_TRUE;
4581     mat->nonew   = 0;
4582   } else if (scall == MAT_REUSE_MATRIX){
4583     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4584     ci = mat->i; cj = mat->j; cam = mat->a;
4585     for (i=0; i<am; i++) {
4586       /* off-diagonal portion of A */
4587       ncols_o = bi[i+1] - bi[i];
4588       for (jo=0; jo<ncols_o; jo++) {
4589         col = cmap[*bj];
4590         if (col >= cstart) break;
4591         *cam++ = *ba++; bj++;
4592       }
4593       /* diagonal portion of A */
4594       ncols_d = ai[i+1] - ai[i];
4595       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4596       /* off-diagonal portion of A */
4597       for (j=jo; j<ncols_o; j++) {
4598         *cam++ = *ba++; bj++;
4599       }
4600     }
4601   } else {
4602     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4603   }
4604 
4605   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4606   PetscFunctionReturn(0);
4607 }
4608 
4609 #undef __FUNCT__
4610 #define __FUNCT__ "MatGetLocalMatCondensed"
4611 /*@C
4612      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4613 
4614     Not Collective
4615 
4616    Input Parameters:
4617 +    A - the matrix
4618 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4619 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4620 
4621    Output Parameter:
4622 .    A_loc - the local sequential matrix generated
4623 
4624     Level: developer
4625 
4626 @*/
4627 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4628 {
4629   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4630   PetscErrorCode    ierr;
4631   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4632   IS                isrowa,iscola;
4633   Mat               *aloc;
4634 
4635   PetscFunctionBegin;
4636   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4637   if (!row){
4638     start = A->rmap->rstart; end = A->rmap->rend;
4639     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4640   } else {
4641     isrowa = *row;
4642   }
4643   if (!col){
4644     start = A->cmap->rstart;
4645     cmap  = a->garray;
4646     nzA   = a->A->cmap->n;
4647     nzB   = a->B->cmap->n;
4648     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4649     ncols = 0;
4650     for (i=0; i<nzB; i++) {
4651       if (cmap[i] < start) idx[ncols++] = cmap[i];
4652       else break;
4653     }
4654     imark = i;
4655     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4656     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4657     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
4658   } else {
4659     iscola = *col;
4660   }
4661   if (scall != MAT_INITIAL_MATRIX){
4662     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4663     aloc[0] = *A_loc;
4664   }
4665   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4666   *A_loc = aloc[0];
4667   ierr = PetscFree(aloc);CHKERRQ(ierr);
4668   if (!row){
4669     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4670   }
4671   if (!col){
4672     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4673   }
4674   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4675   PetscFunctionReturn(0);
4676 }
4677 
4678 #undef __FUNCT__
4679 #define __FUNCT__ "MatGetBrowsOfAcols"
4680 /*@C
4681     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4682 
4683     Collective on Mat
4684 
4685    Input Parameters:
4686 +    A,B - the matrices in mpiaij format
4687 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4688 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4689 
4690    Output Parameter:
4691 +    rowb, colb - index sets of rows and columns of B to extract
4692 .    brstart - row index of B_seq from which next B->rmap->n rows are taken from B's local rows
4693 -    B_seq - the sequential matrix generated
4694 
4695     Level: developer
4696 
4697 @*/
4698 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4699 {
4700   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4701   PetscErrorCode    ierr;
4702   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4703   IS                isrowb,iscolb;
4704   Mat               *bseq;
4705 
4706   PetscFunctionBegin;
4707   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4708     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4709   }
4710   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4711 
4712   if (scall == MAT_INITIAL_MATRIX){
4713     start = A->cmap->rstart;
4714     cmap  = a->garray;
4715     nzA   = a->A->cmap->n;
4716     nzB   = a->B->cmap->n;
4717     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4718     ncols = 0;
4719     for (i=0; i<nzB; i++) {  /* row < local row index */
4720       if (cmap[i] < start) idx[ncols++] = cmap[i];
4721       else break;
4722     }
4723     imark = i;
4724     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4725     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4726     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
4727     *brstart = imark;
4728     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
4729   } else {
4730     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4731     isrowb = *rowb; iscolb = *colb;
4732     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4733     bseq[0] = *B_seq;
4734   }
4735   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4736   *B_seq = bseq[0];
4737   ierr = PetscFree(bseq);CHKERRQ(ierr);
4738   if (!rowb){
4739     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4740   } else {
4741     *rowb = isrowb;
4742   }
4743   if (!colb){
4744     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4745   } else {
4746     *colb = iscolb;
4747   }
4748   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4749   PetscFunctionReturn(0);
4750 }
4751 
4752 #undef __FUNCT__
4753 #define __FUNCT__ "MatGetBrowsOfAoCols"
4754 /*@C
4755     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4756     of the OFF-DIAGONAL portion of local A
4757 
4758     Collective on Mat
4759 
4760    Input Parameters:
4761 +    A,B - the matrices in mpiaij format
4762 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4763 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4764 .    startsj_r - similar to startsj for receives
4765 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4766 
4767    Output Parameter:
4768 +    B_oth - the sequential matrix generated
4769 
4770     Level: developer
4771 
4772 @*/
4773 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
4774 {
4775   VecScatter_MPI_General *gen_to,*gen_from;
4776   PetscErrorCode         ierr;
4777   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4778   Mat_SeqAIJ             *b_oth;
4779   VecScatter             ctx=a->Mvctx;
4780   MPI_Comm               comm=((PetscObject)ctx)->comm;
4781   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4782   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
4783   PetscScalar            *rvalues,*svalues;
4784   MatScalar              *b_otha,*bufa,*bufA;
4785   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4786   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4787   MPI_Status             *sstatus,rstatus;
4788   PetscMPIInt            jj;
4789   PetscInt               *cols,sbs,rbs;
4790   PetscScalar            *vals;
4791 
4792   PetscFunctionBegin;
4793   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend){
4794     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
4795   }
4796   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4797   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4798 
4799   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4800   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4801   rvalues  = gen_from->values; /* holds the length of receiving row */
4802   svalues  = gen_to->values;   /* holds the length of sending row */
4803   nrecvs   = gen_from->n;
4804   nsends   = gen_to->n;
4805 
4806   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4807   srow     = gen_to->indices;   /* local row index to be sent */
4808   sstarts  = gen_to->starts;
4809   sprocs   = gen_to->procs;
4810   sstatus  = gen_to->sstatus;
4811   sbs      = gen_to->bs;
4812   rstarts  = gen_from->starts;
4813   rprocs   = gen_from->procs;
4814   rbs      = gen_from->bs;
4815 
4816   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4817   if (scall == MAT_INITIAL_MATRIX){
4818     /* i-array */
4819     /*---------*/
4820     /*  post receives */
4821     for (i=0; i<nrecvs; i++){
4822       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4823       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4824       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4825     }
4826 
4827     /* pack the outgoing message */
4828     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
4829     sstartsj[0] = 0;  rstartsj[0] = 0;
4830     len = 0; /* total length of j or a array to be sent */
4831     k = 0;
4832     for (i=0; i<nsends; i++){
4833       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4834       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4835       for (j=0; j<nrows; j++) {
4836         row = srow[k] + B->rmap->range[rank]; /* global row idx */
4837         for (l=0; l<sbs; l++){
4838           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4839           rowlen[j*sbs+l] = ncols;
4840           len += ncols;
4841           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4842         }
4843         k++;
4844       }
4845       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4846       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4847     }
4848     /* recvs and sends of i-array are completed */
4849     i = nrecvs;
4850     while (i--) {
4851       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4852     }
4853     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4854 
4855     /* allocate buffers for sending j and a arrays */
4856     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4857     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4858 
4859     /* create i-array of B_oth */
4860     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4861     b_othi[0] = 0;
4862     len = 0; /* total length of j or a array to be received */
4863     k = 0;
4864     for (i=0; i<nrecvs; i++){
4865       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4866       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4867       for (j=0; j<nrows; j++) {
4868         b_othi[k+1] = b_othi[k] + rowlen[j];
4869         len += rowlen[j]; k++;
4870       }
4871       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4872     }
4873 
4874     /* allocate space for j and a arrrays of B_oth */
4875     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4876     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
4877 
4878     /* j-array */
4879     /*---------*/
4880     /*  post receives of j-array */
4881     for (i=0; i<nrecvs; i++){
4882       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4883       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4884     }
4885 
4886     /* pack the outgoing message j-array */
4887     k = 0;
4888     for (i=0; i<nsends; i++){
4889       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4890       bufJ = bufj+sstartsj[i];
4891       for (j=0; j<nrows; j++) {
4892         row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4893         for (ll=0; ll<sbs; ll++){
4894           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4895           for (l=0; l<ncols; l++){
4896             *bufJ++ = cols[l];
4897           }
4898           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4899         }
4900       }
4901       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4902     }
4903 
4904     /* recvs and sends of j-array are completed */
4905     i = nrecvs;
4906     while (i--) {
4907       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4908     }
4909     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4910   } else if (scall == MAT_REUSE_MATRIX){
4911     sstartsj = *startsj;
4912     rstartsj = *startsj_r;
4913     bufa     = *bufa_ptr;
4914     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4915     b_otha   = b_oth->a;
4916   } else {
4917     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4918   }
4919 
4920   /* a-array */
4921   /*---------*/
4922   /*  post receives of a-array */
4923   for (i=0; i<nrecvs; i++){
4924     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4925     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4926   }
4927 
4928   /* pack the outgoing message a-array */
4929   k = 0;
4930   for (i=0; i<nsends; i++){
4931     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4932     bufA = bufa+sstartsj[i];
4933     for (j=0; j<nrows; j++) {
4934       row  = srow[k++] + B->rmap->range[rank]; /* global row idx */
4935       for (ll=0; ll<sbs; ll++){
4936         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4937         for (l=0; l<ncols; l++){
4938           *bufA++ = vals[l];
4939         }
4940         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4941       }
4942     }
4943     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4944   }
4945   /* recvs and sends of a-array are completed */
4946   i = nrecvs;
4947   while (i--) {
4948     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4949   }
4950   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4951   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4952 
4953   if (scall == MAT_INITIAL_MATRIX){
4954     /* put together the new matrix */
4955     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4956 
4957     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4958     /* Since these are PETSc arrays, change flags to free them as necessary. */
4959     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4960     b_oth->free_a  = PETSC_TRUE;
4961     b_oth->free_ij = PETSC_TRUE;
4962     b_oth->nonew   = 0;
4963 
4964     ierr = PetscFree(bufj);CHKERRQ(ierr);
4965     if (!startsj || !bufa_ptr){
4966       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
4967       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4968     } else {
4969       *startsj   = sstartsj;
4970       *startsj_r = rstartsj;
4971       *bufa_ptr  = bufa;
4972     }
4973   }
4974   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4975   PetscFunctionReturn(0);
4976 }
4977 
4978 #undef __FUNCT__
4979 #define __FUNCT__ "MatGetCommunicationStructs"
4980 /*@C
4981   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4982 
4983   Not Collective
4984 
4985   Input Parameters:
4986 . A - The matrix in mpiaij format
4987 
4988   Output Parameter:
4989 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4990 . colmap - A map from global column index to local index into lvec
4991 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4992 
4993   Level: developer
4994 
4995 @*/
4996 #if defined (PETSC_USE_CTABLE)
4997 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4998 #else
4999 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5000 #endif
5001 {
5002   Mat_MPIAIJ *a;
5003 
5004   PetscFunctionBegin;
5005   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5006   PetscValidPointer(lvec, 2);
5007   PetscValidPointer(colmap, 3);
5008   PetscValidPointer(multScatter, 4);
5009   a = (Mat_MPIAIJ *) A->data;
5010   if (lvec) *lvec = a->lvec;
5011   if (colmap) *colmap = a->colmap;
5012   if (multScatter) *multScatter = a->Mvctx;
5013   PetscFunctionReturn(0);
5014 }
5015 
5016 EXTERN_C_BEGIN
5017 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJCRL(Mat,const MatType,MatReuse,Mat*);
5018 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPIAIJPERM(Mat,const MatType,MatReuse,Mat*);
5019 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
5020 EXTERN_C_END
5021 
5022 #undef __FUNCT__
5023 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5024 /*
5025     Computes (B'*A')' since computing B*A directly is untenable
5026 
5027                n                       p                          p
5028         (              )       (              )         (                  )
5029       m (      A       )  *  n (       B      )   =   m (         C        )
5030         (              )       (              )         (                  )
5031 
5032 */
5033 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5034 {
5035   PetscErrorCode     ierr;
5036   Mat                At,Bt,Ct;
5037 
5038   PetscFunctionBegin;
5039   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5040   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5041   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5042   ierr = MatDestroy(At);CHKERRQ(ierr);
5043   ierr = MatDestroy(Bt);CHKERRQ(ierr);
5044   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5045   ierr = MatDestroy(Ct);CHKERRQ(ierr);
5046   PetscFunctionReturn(0);
5047 }
5048 
5049 #undef __FUNCT__
5050 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5051 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5052 {
5053   PetscErrorCode ierr;
5054   PetscInt       m=A->rmap->n,n=B->cmap->n;
5055   Mat            Cmat;
5056 
5057   PetscFunctionBegin;
5058   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5059   ierr = MatCreate(((PetscObject)A)->comm,&Cmat);CHKERRQ(ierr);
5060   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5061   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5062   ierr = MatMPIDenseSetPreallocation(Cmat,PETSC_NULL);CHKERRQ(ierr);
5063   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5064   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5065   *C   = Cmat;
5066   PetscFunctionReturn(0);
5067 }
5068 
5069 /* ----------------------------------------------------------------*/
5070 #undef __FUNCT__
5071 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5072 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5073 {
5074   PetscErrorCode ierr;
5075 
5076   PetscFunctionBegin;
5077   if (scall == MAT_INITIAL_MATRIX){
5078     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5079   }
5080   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5081   PetscFunctionReturn(0);
5082 }
5083 
5084 EXTERN_C_BEGIN
5085 #if defined(PETSC_HAVE_MUMPS)
5086 extern PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5087 #endif
5088 #if defined(PETSC_HAVE_PASTIX)
5089 extern PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5090 #endif
5091 #if defined(PETSC_HAVE_SUPERLU_DIST)
5092 extern PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5093 #endif
5094 #if defined(PETSC_HAVE_SPOOLES)
5095 extern PetscErrorCode MatGetFactor_mpiaij_spooles(Mat,MatFactorType,Mat*);
5096 #endif
5097 EXTERN_C_END
5098 
5099 /*MC
5100    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5101 
5102    Options Database Keys:
5103 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5104 
5105   Level: beginner
5106 
5107 .seealso: MatCreateMPIAIJ()
5108 M*/
5109 
5110 EXTERN_C_BEGIN
5111 #undef __FUNCT__
5112 #define __FUNCT__ "MatCreate_MPIAIJ"
5113 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
5114 {
5115   Mat_MPIAIJ     *b;
5116   PetscErrorCode ierr;
5117   PetscMPIInt    size;
5118 
5119   PetscFunctionBegin;
5120   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
5121 
5122   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5123   B->data         = (void*)b;
5124   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5125   B->rmap->bs     = 1;
5126   B->assembled    = PETSC_FALSE;
5127   B->mapping      = 0;
5128 
5129   B->insertmode   = NOT_SET_VALUES;
5130   b->size         = size;
5131   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
5132 
5133   /* build cache for off array entries formed */
5134   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
5135   b->donotstash  = PETSC_FALSE;
5136   b->colmap      = 0;
5137   b->garray      = 0;
5138   b->roworiented = PETSC_TRUE;
5139 
5140   /* stuff used for matrix vector multiply */
5141   b->lvec      = PETSC_NULL;
5142   b->Mvctx     = PETSC_NULL;
5143 
5144   /* stuff for MatGetRow() */
5145   b->rowindices   = 0;
5146   b->rowvalues    = 0;
5147   b->getrowactive = PETSC_FALSE;
5148 
5149 #if defined(PETSC_HAVE_SPOOLES)
5150   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
5151                                      "MatGetFactor_mpiaij_spooles",
5152                                      MatGetFactor_mpiaij_spooles);CHKERRQ(ierr);
5153 #endif
5154 #if defined(PETSC_HAVE_MUMPS)
5155   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
5156                                      "MatGetFactor_aij_mumps",
5157                                      MatGetFactor_aij_mumps);CHKERRQ(ierr);
5158 #endif
5159 #if defined(PETSC_HAVE_PASTIX)
5160   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
5161 					   "MatGetFactor_mpiaij_pastix",
5162 					   MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5163 #endif
5164 #if defined(PETSC_HAVE_SUPERLU_DIST)
5165   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_superlu_dist_C",
5166                                      "MatGetFactor_mpiaij_superlu_dist",
5167                                      MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5168 #endif
5169   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
5170                                      "MatStoreValues_MPIAIJ",
5171                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5172   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
5173                                      "MatRetrieveValues_MPIAIJ",
5174                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5175   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
5176 				     "MatGetDiagonalBlock_MPIAIJ",
5177                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5178   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
5179 				     "MatIsTranspose_MPIAIJ",
5180 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5181   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
5182 				     "MatMPIAIJSetPreallocation_MPIAIJ",
5183 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5184   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
5185 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
5186 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5187   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
5188 				     "MatDiagonalScaleLocal_MPIAIJ",
5189 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5190   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",
5191                                      "MatConvert_MPIAIJ_MPIAIJPERM",
5192                                       MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5193   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",
5194                                      "MatConvert_MPIAIJ_MPIAIJCRL",
5195                                       MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5196   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",
5197                                      "MatConvert_MPIAIJ_MPISBAIJ",
5198                                       MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5199   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",
5200                                      "MatMatMult_MPIDense_MPIAIJ",
5201                                       MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5202   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",
5203                                      "MatMatMultSymbolic_MPIDense_MPIAIJ",
5204                                      MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5205   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",
5206                                      "MatMatMultNumeric_MPIDense_MPIAIJ",
5207                                       MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5208   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5209   PetscFunctionReturn(0);
5210 }
5211 EXTERN_C_END
5212 
5213 #undef __FUNCT__
5214 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5215 /*@
5216      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5217          and "off-diagonal" part of the matrix in CSR format.
5218 
5219    Collective on MPI_Comm
5220 
5221    Input Parameters:
5222 +  comm - MPI communicator
5223 .  m - number of local rows (Cannot be PETSC_DECIDE)
5224 .  n - This value should be the same as the local size used in creating the
5225        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5226        calculated if N is given) For square matrices n is almost always m.
5227 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5228 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5229 .   i - row indices for "diagonal" portion of matrix
5230 .   j - column indices
5231 .   a - matrix values
5232 .   oi - row indices for "off-diagonal" portion of matrix
5233 .   oj - column indices
5234 -   oa - matrix values
5235 
5236    Output Parameter:
5237 .   mat - the matrix
5238 
5239    Level: advanced
5240 
5241    Notes:
5242        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
5243 
5244        The i and j indices are 0 based
5245 
5246        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5247 
5248        This sets local rows and cannot be used to set off-processor values.
5249 
5250        You cannot later use MatSetValues() to change values in this matrix.
5251 
5252 .keywords: matrix, aij, compressed row, sparse, parallel
5253 
5254 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5255           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
5256 @*/
5257 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
5258 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5259 {
5260   PetscErrorCode ierr;
5261   Mat_MPIAIJ     *maij;
5262 
5263  PetscFunctionBegin;
5264   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5265   if (i[0]) {
5266     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5267   }
5268   if (oi[0]) {
5269     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5270   }
5271   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5272   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5273   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5274   maij = (Mat_MPIAIJ*) (*mat)->data;
5275   maij->donotstash     = PETSC_TRUE;
5276   (*mat)->preallocated = PETSC_TRUE;
5277 
5278   ierr = PetscLayoutSetBlockSize((*mat)->rmap,1);CHKERRQ(ierr);
5279   ierr = PetscLayoutSetBlockSize((*mat)->cmap,1);CHKERRQ(ierr);
5280   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5281   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5282 
5283   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5284   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5285 
5286   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5287   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5288   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5289   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5290 
5291   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5292   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5293   PetscFunctionReturn(0);
5294 }
5295 
5296 /*
5297     Special version for direct calls from Fortran
5298 */
5299 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5300 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5301 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5302 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5303 #endif
5304 
5305 /* Change these macros so can be used in void function */
5306 #undef CHKERRQ
5307 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5308 #undef SETERRQ2
5309 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5310 #undef SETERRQ
5311 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5312 
5313 EXTERN_C_BEGIN
5314 #undef __FUNCT__
5315 #define __FUNCT__ "matsetvaluesmpiaij_"
5316 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5317 {
5318   Mat             mat = *mmat;
5319   PetscInt        m = *mm, n = *mn;
5320   InsertMode      addv = *maddv;
5321   Mat_MPIAIJ      *aij = (Mat_MPIAIJ*)mat->data;
5322   PetscScalar     value;
5323   PetscErrorCode  ierr;
5324 
5325   ierr = MatPreallocated(mat);CHKERRQ(ierr);
5326   if (mat->insertmode == NOT_SET_VALUES) {
5327     mat->insertmode = addv;
5328   }
5329 #if defined(PETSC_USE_DEBUG)
5330   else if (mat->insertmode != addv) {
5331     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5332   }
5333 #endif
5334   {
5335   PetscInt        i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
5336   PetscInt        cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5337   PetscBool       roworiented = aij->roworiented;
5338 
5339   /* Some Variables required in the macro */
5340   Mat             A = aij->A;
5341   Mat_SeqAIJ      *a = (Mat_SeqAIJ*)A->data;
5342   PetscInt        *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5343   MatScalar       *aa = a->a;
5344   PetscBool       ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
5345   Mat             B = aij->B;
5346   Mat_SeqAIJ      *b = (Mat_SeqAIJ*)B->data;
5347   PetscInt        *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5348   MatScalar       *ba = b->a;
5349 
5350   PetscInt        *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5351   PetscInt        nonew = a->nonew;
5352   MatScalar       *ap1,*ap2;
5353 
5354   PetscFunctionBegin;
5355   for (i=0; i<m; i++) {
5356     if (im[i] < 0) continue;
5357 #if defined(PETSC_USE_DEBUG)
5358     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5359 #endif
5360     if (im[i] >= rstart && im[i] < rend) {
5361       row      = im[i] - rstart;
5362       lastcol1 = -1;
5363       rp1      = aj + ai[row];
5364       ap1      = aa + ai[row];
5365       rmax1    = aimax[row];
5366       nrow1    = ailen[row];
5367       low1     = 0;
5368       high1    = nrow1;
5369       lastcol2 = -1;
5370       rp2      = bj + bi[row];
5371       ap2      = ba + bi[row];
5372       rmax2    = bimax[row];
5373       nrow2    = bilen[row];
5374       low2     = 0;
5375       high2    = nrow2;
5376 
5377       for (j=0; j<n; j++) {
5378         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
5379         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5380         if (in[j] >= cstart && in[j] < cend){
5381           col = in[j] - cstart;
5382           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5383         } else if (in[j] < 0) continue;
5384 #if defined(PETSC_USE_DEBUG)
5385         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5386 #endif
5387         else {
5388           if (mat->was_assembled) {
5389             if (!aij->colmap) {
5390               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5391             }
5392 #if defined (PETSC_USE_CTABLE)
5393             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5394 	    col--;
5395 #else
5396             col = aij->colmap[in[j]] - 1;
5397 #endif
5398             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5399               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5400               col =  in[j];
5401               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5402               B = aij->B;
5403               b = (Mat_SeqAIJ*)B->data;
5404               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5405               rp2      = bj + bi[row];
5406               ap2      = ba + bi[row];
5407               rmax2    = bimax[row];
5408               nrow2    = bilen[row];
5409               low2     = 0;
5410               high2    = nrow2;
5411               bm       = aij->B->rmap->n;
5412               ba = b->a;
5413             }
5414           } else col = in[j];
5415           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5416         }
5417       }
5418     } else {
5419       if (!aij->donotstash) {
5420         if (roworiented) {
5421           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5422         } else {
5423           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5424         }
5425       }
5426     }
5427   }}
5428   PetscFunctionReturnVoid();
5429 }
5430 EXTERN_C_END
5431 
5432