xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a6570f20d5bf426f46356aeda2a42fd24720f376)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1521 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1522 {
1523   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1528   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1533 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1534 {
1535   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatEqual_MPIAIJ"
1545 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1546 {
1547   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1548   Mat            a,b,c,d;
1549   PetscTruth     flg;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   a = matA->A; b = matA->B;
1554   c = matB->A; d = matB->B;
1555 
1556   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1557   if (flg) {
1558     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1559   }
1560   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatCopy_MPIAIJ"
1566 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1567 {
1568   PetscErrorCode ierr;
1569   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1570   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1571 
1572   PetscFunctionBegin;
1573   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1574   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1575     /* because of the column compression in the off-processor part of the matrix a->B,
1576        the number of columns in a->B and b->B may be different, hence we cannot call
1577        the MatCopy() directly on the two parts. If need be, we can provide a more
1578        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1579        then copying the submatrices */
1580     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1581   } else {
1582     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1583     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1590 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1591 {
1592   PetscErrorCode ierr;
1593 
1594   PetscFunctionBegin;
1595   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #include "petscblaslapack.h"
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatAXPY_MPIAIJ"
1602 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1603 {
1604   PetscErrorCode ierr;
1605   PetscInt       i;
1606   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1607   PetscBLASInt   bnz,one=1;
1608   Mat_SeqAIJ     *x,*y;
1609 
1610   PetscFunctionBegin;
1611   if (str == SAME_NONZERO_PATTERN) {
1612     PetscScalar alpha = a;
1613     x = (Mat_SeqAIJ *)xx->A->data;
1614     y = (Mat_SeqAIJ *)yy->A->data;
1615     bnz = (PetscBLASInt)x->nz;
1616     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1617     x = (Mat_SeqAIJ *)xx->B->data;
1618     y = (Mat_SeqAIJ *)yy->B->data;
1619     bnz = (PetscBLASInt)x->nz;
1620     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1621   } else if (str == SUBSET_NONZERO_PATTERN) {
1622     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1623 
1624     x = (Mat_SeqAIJ *)xx->B->data;
1625     y = (Mat_SeqAIJ *)yy->B->data;
1626     if (y->xtoy && y->XtoY != xx->B) {
1627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1628       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1629     }
1630     if (!y->xtoy) { /* get xtoy */
1631       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1632       y->XtoY = xx->B;
1633     }
1634     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1635   } else {
1636     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1642 
1643 #undef __FUNCT__
1644 #define __FUNCT__ "MatConjugate_MPIAIJ"
1645 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1646 {
1647 #if defined(PETSC_USE_COMPLEX)
1648   PetscErrorCode ierr;
1649   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1650 
1651   PetscFunctionBegin;
1652   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1653   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1654 #else
1655   PetscFunctionBegin;
1656 #endif
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatRealPart_MPIAIJ"
1662 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1663 {
1664   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1665   PetscErrorCode ierr;
1666 
1667   PetscFunctionBegin;
1668   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1669   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1675 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1682   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #ifdef PETSC_HAVE_PBGL
1687 #include <boost/parallel/mpi/bsp_process_group.hpp>
1688 typedef boost::parallel::mpi::bsp_process_group            process_group_type;
1689 
1690 #include <boost/graph/distributed/adjacency_list.hpp>
1691 #include <boost/parallel/mpi/bsp_process_group.hpp>
1692 
1693 #include <boost/graph/distributed/petsc/interface.hpp>
1694 #include <boost/graph/distributed/ilu_0.hpp>
1695 
1696 namespace petsc = boost::distributed::petsc;
1697 using namespace std;
1698 typedef double                                             value_type;
1699 typedef boost::graph::distributed::ilu_elimination_state   elimination_state;
1700 typedef boost::adjacency_list<boost::listS,
1701                        boost::distributedS<process_group_type, boost::vecS>,
1702                        boost::bidirectionalS,
1703                        // Vertex properties
1704                        boost::no_property,
1705                        // Edge properties
1706                        boost::property<boost::edge_weight_t, value_type,
1707                          boost::property<boost::edge_finished_t, elimination_state> > > graph_type;
1708 
1709 typedef boost::graph_traits<graph_type>::vertex_descriptor        vertex_type;
1710 typedef boost::graph_traits<graph_type>::edge_descriptor          edge_type;
1711 typedef boost::property_map<graph_type, boost::edge_weight_t>::type      weight_map_type;
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1715 /*
1716   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1717 */
1718 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1719 {
1720   Mat_MPIAIJ          *a = (Mat_MPIAIJ *) A->data;
1721   PetscTruth           row_identity, col_identity;
1722   PetscObjectContainer c;
1723   PetscInt             m, n, M, N;
1724   PetscErrorCode       ierr;
1725 
1726   PetscFunctionBegin;
1727   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1728   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1729   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1730   if (!row_identity || !col_identity) {
1731     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1732   }
1733 
1734   process_group_type pg;
1735   graph_type*        graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1736   graph_type&        graph   = *graph_p;
1737   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738 
1739   //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph));
1740   boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph);
1741   BGL_FORALL_EDGES(e, graph, graph_type)
1742     put(finished, e, boost::graph::distributed::unseen);
1743 
1744   ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph));
1745 
1746   /* put together the new matrix */
1747   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1748   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1749   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1750   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1751   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1752   (*fact)->factor = FACTOR_LU;
1753 
1754   ierr = PetscObjectContainerCreate(A->comm, &c);
1755   ierr = PetscObjectContainerSetPointer(c, graph_p);
1756   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1762 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1763 {
1764   PetscFunctionBegin;
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatSolve_MPIAIJ"
1770 /*
1771   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1772 */
1773 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1774 {
1775   graph_type*          graph_p;
1776   PetscObjectContainer c;
1777   PetscErrorCode       ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1781   ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr);
1782   ierr = VecCopy(b, x); CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 #endif
1786 
1787 /* -------------------------------------------------------------------*/
1788 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1789        MatGetRow_MPIAIJ,
1790        MatRestoreRow_MPIAIJ,
1791        MatMult_MPIAIJ,
1792 /* 4*/ MatMultAdd_MPIAIJ,
1793        MatMultTranspose_MPIAIJ,
1794        MatMultTransposeAdd_MPIAIJ,
1795 #ifdef PETSC_HAVE_PBGL
1796        MatSolve_MPIAIJ,
1797 #else
1798        0,
1799 #endif
1800        0,
1801        0,
1802 /*10*/ 0,
1803        0,
1804        0,
1805        MatRelax_MPIAIJ,
1806        MatTranspose_MPIAIJ,
1807 /*15*/ MatGetInfo_MPIAIJ,
1808        MatEqual_MPIAIJ,
1809        MatGetDiagonal_MPIAIJ,
1810        MatDiagonalScale_MPIAIJ,
1811        MatNorm_MPIAIJ,
1812 /*20*/ MatAssemblyBegin_MPIAIJ,
1813        MatAssemblyEnd_MPIAIJ,
1814        0,
1815        MatSetOption_MPIAIJ,
1816        MatZeroEntries_MPIAIJ,
1817 /*25*/ MatZeroRows_MPIAIJ,
1818        0,
1819 #ifdef PETSC_HAVE_PBGL
1820        MatLUFactorNumeric_MPIAIJ,
1821 #else
1822        0,
1823 #endif
1824        0,
1825        0,
1826 /*30*/ MatSetUpPreallocation_MPIAIJ,
1827 #ifdef PETSC_HAVE_PBGL
1828        MatILUFactorSymbolic_MPIAIJ,
1829 #else
1830        0,
1831 #endif
1832        0,
1833        0,
1834        0,
1835 /*35*/ MatDuplicate_MPIAIJ,
1836        0,
1837        0,
1838        0,
1839        0,
1840 /*40*/ MatAXPY_MPIAIJ,
1841        MatGetSubMatrices_MPIAIJ,
1842        MatIncreaseOverlap_MPIAIJ,
1843        MatGetValues_MPIAIJ,
1844        MatCopy_MPIAIJ,
1845 /*45*/ 0,
1846        MatScale_MPIAIJ,
1847        0,
1848        0,
1849        0,
1850 /*50*/ MatSetBlockSize_MPIAIJ,
1851        0,
1852        0,
1853        0,
1854        0,
1855 /*55*/ MatFDColoringCreate_MPIAIJ,
1856        0,
1857        MatSetUnfactored_MPIAIJ,
1858        MatPermute_MPIAIJ,
1859        0,
1860 /*60*/ MatGetSubMatrix_MPIAIJ,
1861        MatDestroy_MPIAIJ,
1862        MatView_MPIAIJ,
1863        0,
1864        0,
1865 /*65*/ 0,
1866        0,
1867        0,
1868        0,
1869        0,
1870 /*70*/ 0,
1871        0,
1872        MatSetColoring_MPIAIJ,
1873 #if defined(PETSC_HAVE_ADIC)
1874        MatSetValuesAdic_MPIAIJ,
1875 #else
1876        0,
1877 #endif
1878        MatSetValuesAdifor_MPIAIJ,
1879 /*75*/ 0,
1880        0,
1881        0,
1882        0,
1883        0,
1884 /*80*/ 0,
1885        0,
1886        0,
1887        0,
1888 /*84*/ MatLoad_MPIAIJ,
1889        0,
1890        0,
1891        0,
1892        0,
1893        0,
1894 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1895        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1896        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1897        MatPtAP_Basic,
1898        MatPtAPSymbolic_MPIAIJ,
1899 /*95*/ MatPtAPNumeric_MPIAIJ,
1900        0,
1901        0,
1902        0,
1903        0,
1904 /*100*/0,
1905        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1906        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1907        MatConjugate_MPIAIJ,
1908        0,
1909 /*105*/MatSetValuesRow_MPIAIJ,
1910        MatRealPart_MPIAIJ,
1911        MatImaginaryPart_MPIAIJ};
1912 
1913 /* ----------------------------------------------------------------------------------------*/
1914 
1915 EXTERN_C_BEGIN
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1918 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1919 {
1920   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1925   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 EXTERN_C_END
1929 
1930 EXTERN_C_BEGIN
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1933 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1934 {
1935   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1940   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 EXTERN_C_END
1944 
1945 #include "petscpc.h"
1946 EXTERN_C_BEGIN
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1949 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1950 {
1951   Mat_MPIAIJ     *b;
1952   PetscErrorCode ierr;
1953   PetscInt       i;
1954 
1955   PetscFunctionBegin;
1956   B->preallocated = PETSC_TRUE;
1957   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1958   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1959   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1960   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1961 
1962   B->rmap.bs = B->cmap.bs = 1;
1963   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1964   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1965   if (d_nnz) {
1966     for (i=0; i<B->rmap.n; i++) {
1967       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1968     }
1969   }
1970   if (o_nnz) {
1971     for (i=0; i<B->rmap.n; i++) {
1972       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1973     }
1974   }
1975   b = (Mat_MPIAIJ*)B->data;
1976 
1977   /* Explicitly create 2 MATSEQAIJ matrices. */
1978   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1979   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1980   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1981   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1982   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1983   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1984   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1985   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1986 
1987   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1988   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1989 
1990   PetscFunctionReturn(0);
1991 }
1992 EXTERN_C_END
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1996 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1997 {
1998   Mat            mat;
1999   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   *newmat       = 0;
2004   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2005   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2006   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2007   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2008   a    = (Mat_MPIAIJ*)mat->data;
2009 
2010   mat->factor       = matin->factor;
2011   mat->rmap.bs      = matin->rmap.bs;
2012   mat->assembled    = PETSC_TRUE;
2013   mat->insertmode   = NOT_SET_VALUES;
2014   mat->preallocated = PETSC_TRUE;
2015 
2016   a->size           = oldmat->size;
2017   a->rank           = oldmat->rank;
2018   a->donotstash     = oldmat->donotstash;
2019   a->roworiented    = oldmat->roworiented;
2020   a->rowindices     = 0;
2021   a->rowvalues      = 0;
2022   a->getrowactive   = PETSC_FALSE;
2023 
2024   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2025   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2026 
2027   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2028   if (oldmat->colmap) {
2029 #if defined (PETSC_USE_CTABLE)
2030     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2031 #else
2032     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2033     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2034     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2035 #endif
2036   } else a->colmap = 0;
2037   if (oldmat->garray) {
2038     PetscInt len;
2039     len  = oldmat->B->cmap.n;
2040     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2041     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2042     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2043   } else a->garray = 0;
2044 
2045   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2046   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2047   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2048   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2049   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2050   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2051   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2052   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2053   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2054   *newmat = mat;
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #include "petscsys.h"
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatLoad_MPIAIJ"
2062 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2063 {
2064   Mat            A;
2065   PetscScalar    *vals,*svals;
2066   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2067   MPI_Status     status;
2068   PetscErrorCode ierr;
2069   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2070   PetscInt       i,nz,j,rstart,rend,mmax;
2071   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2072   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2073   PetscInt       cend,cstart,n,*rowners;
2074   int            fd;
2075 
2076   PetscFunctionBegin;
2077   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2079   if (!rank) {
2080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2081     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2082     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2083   }
2084 
2085   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2086   M = header[1]; N = header[2];
2087   /* determine ownership of all rows */
2088   m    = M/size + ((M % size) > rank);
2089   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2090   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2091 
2092   /* First process needs enough room for process with most rows */
2093   if (!rank) {
2094     mmax       = rowners[1];
2095     for (i=2; i<size; i++) {
2096       mmax = PetscMax(mmax,rowners[i]);
2097     }
2098   } else mmax = m;
2099 
2100   rowners[0] = 0;
2101   for (i=2; i<=size; i++) {
2102     mmax       = PetscMax(mmax,rowners[i]);
2103     rowners[i] += rowners[i-1];
2104   }
2105   rstart = rowners[rank];
2106   rend   = rowners[rank+1];
2107 
2108   /* distribute row lengths to all processors */
2109   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2110   if (!rank) {
2111     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2112     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2113     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2114     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2115     for (j=0; j<m; j++) {
2116       procsnz[0] += ourlens[j];
2117     }
2118     for (i=1; i<size; i++) {
2119       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2120       /* calculate the number of nonzeros on each processor */
2121       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2122         procsnz[i] += rowlengths[j];
2123       }
2124       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2127   } else {
2128     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2129   }
2130 
2131   if (!rank) {
2132     /* determine max buffer needed and allocate it */
2133     maxnz = 0;
2134     for (i=0; i<size; i++) {
2135       maxnz = PetscMax(maxnz,procsnz[i]);
2136     }
2137     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2138 
2139     /* read in my part of the matrix column indices  */
2140     nz   = procsnz[0];
2141     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2142     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2143 
2144     /* read in every one elses and ship off */
2145     for (i=1; i<size; i++) {
2146       nz   = procsnz[i];
2147       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2148       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2149     }
2150     ierr = PetscFree(cols);CHKERRQ(ierr);
2151   } else {
2152     /* determine buffer space needed for message */
2153     nz = 0;
2154     for (i=0; i<m; i++) {
2155       nz += ourlens[i];
2156     }
2157     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2158 
2159     /* receive message of column indices*/
2160     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2161     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2162     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2163   }
2164 
2165   /* determine column ownership if matrix is not square */
2166   if (N != M) {
2167     n      = N/size + ((N % size) > rank);
2168     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2169     cstart = cend - n;
2170   } else {
2171     cstart = rstart;
2172     cend   = rend;
2173     n      = cend - cstart;
2174   }
2175 
2176   /* loop over local rows, determining number of off diagonal entries */
2177   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2178   jj = 0;
2179   for (i=0; i<m; i++) {
2180     for (j=0; j<ourlens[i]; j++) {
2181       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2182       jj++;
2183     }
2184   }
2185 
2186   /* create our matrix */
2187   for (i=0; i<m; i++) {
2188     ourlens[i] -= offlens[i];
2189   }
2190   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2191   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2192   ierr = MatSetType(A,type);CHKERRQ(ierr);
2193   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2194 
2195   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2196   for (i=0; i<m; i++) {
2197     ourlens[i] += offlens[i];
2198   }
2199 
2200   if (!rank) {
2201     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2202 
2203     /* read in my part of the matrix numerical values  */
2204     nz   = procsnz[0];
2205     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2206 
2207     /* insert into matrix */
2208     jj      = rstart;
2209     smycols = mycols;
2210     svals   = vals;
2211     for (i=0; i<m; i++) {
2212       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2213       smycols += ourlens[i];
2214       svals   += ourlens[i];
2215       jj++;
2216     }
2217 
2218     /* read in other processors and ship out */
2219     for (i=1; i<size; i++) {
2220       nz   = procsnz[i];
2221       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2222       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2223     }
2224     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2225   } else {
2226     /* receive numeric values */
2227     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2228 
2229     /* receive message of values*/
2230     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2231     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2232     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2233 
2234     /* insert into matrix */
2235     jj      = rstart;
2236     smycols = mycols;
2237     svals   = vals;
2238     for (i=0; i<m; i++) {
2239       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2240       smycols += ourlens[i];
2241       svals   += ourlens[i];
2242       jj++;
2243     }
2244   }
2245   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2246   ierr = PetscFree(vals);CHKERRQ(ierr);
2247   ierr = PetscFree(mycols);CHKERRQ(ierr);
2248   ierr = PetscFree(rowners);CHKERRQ(ierr);
2249 
2250   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   *newmat = A;
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2258 /*
2259     Not great since it makes two copies of the submatrix, first an SeqAIJ
2260   in local and then by concatenating the local matrices the end result.
2261   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2262 */
2263 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2264 {
2265   PetscErrorCode ierr;
2266   PetscMPIInt    rank,size;
2267   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2268   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2269   Mat            *local,M,Mreuse;
2270   PetscScalar    *vwork,*aa;
2271   MPI_Comm       comm = mat->comm;
2272   Mat_SeqAIJ     *aij;
2273 
2274 
2275   PetscFunctionBegin;
2276   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2277   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2278 
2279   if (call ==  MAT_REUSE_MATRIX) {
2280     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2281     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2282     local = &Mreuse;
2283     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2284   } else {
2285     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2286     Mreuse = *local;
2287     ierr   = PetscFree(local);CHKERRQ(ierr);
2288   }
2289 
2290   /*
2291       m - number of local rows
2292       n - number of columns (same on all processors)
2293       rstart - first row in new global matrix generated
2294   */
2295   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2296   if (call == MAT_INITIAL_MATRIX) {
2297     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2298     ii  = aij->i;
2299     jj  = aij->j;
2300 
2301     /*
2302         Determine the number of non-zeros in the diagonal and off-diagonal
2303         portions of the matrix in order to do correct preallocation
2304     */
2305 
2306     /* first get start and end of "diagonal" columns */
2307     if (csize == PETSC_DECIDE) {
2308       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2309       if (mglobal == n) { /* square matrix */
2310 	nlocal = m;
2311       } else {
2312         nlocal = n/size + ((n % size) > rank);
2313       }
2314     } else {
2315       nlocal = csize;
2316     }
2317     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2318     rstart = rend - nlocal;
2319     if (rank == size - 1 && rend != n) {
2320       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2321     }
2322 
2323     /* next, compute all the lengths */
2324     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2325     olens = dlens + m;
2326     for (i=0; i<m; i++) {
2327       jend = ii[i+1] - ii[i];
2328       olen = 0;
2329       dlen = 0;
2330       for (j=0; j<jend; j++) {
2331         if (*jj < rstart || *jj >= rend) olen++;
2332         else dlen++;
2333         jj++;
2334       }
2335       olens[i] = olen;
2336       dlens[i] = dlen;
2337     }
2338     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2339     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2340     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2341     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2342     ierr = PetscFree(dlens);CHKERRQ(ierr);
2343   } else {
2344     PetscInt ml,nl;
2345 
2346     M = *newmat;
2347     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2348     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2349     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2350     /*
2351          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2352        rather than the slower MatSetValues().
2353     */
2354     M->was_assembled = PETSC_TRUE;
2355     M->assembled     = PETSC_FALSE;
2356   }
2357   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2358   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2359   ii  = aij->i;
2360   jj  = aij->j;
2361   aa  = aij->a;
2362   for (i=0; i<m; i++) {
2363     row   = rstart + i;
2364     nz    = ii[i+1] - ii[i];
2365     cwork = jj;     jj += nz;
2366     vwork = aa;     aa += nz;
2367     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2368   }
2369 
2370   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   *newmat = M;
2373 
2374   /* save submatrix used in processor for next request */
2375   if (call ==  MAT_INITIAL_MATRIX) {
2376     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2377     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2378   }
2379 
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 EXTERN_C_BEGIN
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2386 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2387 {
2388   PetscInt       m,cstart, cend,j,nnz,i,d;
2389   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2390   const PetscInt *JJ;
2391   PetscScalar    *values;
2392   PetscErrorCode ierr;
2393 
2394   PetscFunctionBegin;
2395   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2396 
2397   B->rmap.bs = B->cmap.bs = 1;
2398   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2399   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2400   m      = B->rmap.n;
2401   cstart = B->cmap.rstart;
2402   cend   = B->cmap.rend;
2403   rstart = B->rmap.rstart;
2404 
2405   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2406   o_nnz = d_nnz + m;
2407 
2408   for (i=0; i<m; i++) {
2409     nnz     = Ii[i+1]- Ii[i];
2410     JJ      = J + Ii[i];
2411     nnz_max = PetscMax(nnz_max,nnz);
2412     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2413     for (j=0; j<nnz; j++) {
2414       if (*JJ >= cstart) break;
2415       JJ++;
2416     }
2417     d = 0;
2418     for (; j<nnz; j++) {
2419       if (*JJ++ >= cend) break;
2420       d++;
2421     }
2422     d_nnz[i] = d;
2423     o_nnz[i] = nnz - d;
2424   }
2425   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2426   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2427 
2428   if (v) values = (PetscScalar*)v;
2429   else {
2430     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2431     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2432   }
2433 
2434   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2435   for (i=0; i<m; i++) {
2436     ii   = i + rstart;
2437     nnz  = Ii[i+1]- Ii[i];
2438     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2439   }
2440   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2443 
2444   if (!v) {
2445     ierr = PetscFree(values);CHKERRQ(ierr);
2446   }
2447   PetscFunctionReturn(0);
2448 }
2449 EXTERN_C_END
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2453 /*@
2454    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2455    (the default parallel PETSc format).
2456 
2457    Collective on MPI_Comm
2458 
2459    Input Parameters:
2460 +  B - the matrix
2461 .  i - the indices into j for the start of each local row (starts with zero)
2462 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2463 -  v - optional values in the matrix
2464 
2465    Level: developer
2466 
2467    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2468      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2469 
2470 .keywords: matrix, aij, compressed row, sparse, parallel
2471 
2472 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2473           MatCreateSeqAIJWithArrays()
2474 @*/
2475 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2476 {
2477   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2478 
2479   PetscFunctionBegin;
2480   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2481   if (f) {
2482     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2483   }
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2489 /*@C
2490    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2491    (the default parallel PETSc format).  For good matrix assembly performance
2492    the user should preallocate the matrix storage by setting the parameters
2493    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2494    performance can be increased by more than a factor of 50.
2495 
2496    Collective on MPI_Comm
2497 
2498    Input Parameters:
2499 +  A - the matrix
2500 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2501            (same value is used for all local rows)
2502 .  d_nnz - array containing the number of nonzeros in the various rows of the
2503            DIAGONAL portion of the local submatrix (possibly different for each row)
2504            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2505            The size of this array is equal to the number of local rows, i.e 'm'.
2506            You must leave room for the diagonal entry even if it is zero.
2507 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2508            submatrix (same value is used for all local rows).
2509 -  o_nnz - array containing the number of nonzeros in the various rows of the
2510            OFF-DIAGONAL portion of the local submatrix (possibly different for
2511            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2512            structure. The size of this array is equal to the number
2513            of local rows, i.e 'm'.
2514 
2515    If the *_nnz parameter is given then the *_nz parameter is ignored
2516 
2517    The AIJ format (also called the Yale sparse matrix format or
2518    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2519    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2520 
2521    The parallel matrix is partitioned such that the first m0 rows belong to
2522    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2523    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2524 
2525    The DIAGONAL portion of the local submatrix of a processor can be defined
2526    as the submatrix which is obtained by extraction the part corresponding
2527    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2528    first row that belongs to the processor, and r2 is the last row belonging
2529    to the this processor. This is a square mxm matrix. The remaining portion
2530    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2531 
2532    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2533 
2534    Example usage:
2535 
2536    Consider the following 8x8 matrix with 34 non-zero values, that is
2537    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2538    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2539    as follows:
2540 
2541 .vb
2542             1  2  0  |  0  3  0  |  0  4
2543     Proc0   0  5  6  |  7  0  0  |  8  0
2544             9  0 10  | 11  0  0  | 12  0
2545     -------------------------------------
2546            13  0 14  | 15 16 17  |  0  0
2547     Proc1   0 18  0  | 19 20 21  |  0  0
2548             0  0  0  | 22 23  0  | 24  0
2549     -------------------------------------
2550     Proc2  25 26 27  |  0  0 28  | 29  0
2551            30  0  0  | 31 32 33  |  0 34
2552 .ve
2553 
2554    This can be represented as a collection of submatrices as:
2555 
2556 .vb
2557       A B C
2558       D E F
2559       G H I
2560 .ve
2561 
2562    Where the submatrices A,B,C are owned by proc0, D,E,F are
2563    owned by proc1, G,H,I are owned by proc2.
2564 
2565    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2566    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2567    The 'M','N' parameters are 8,8, and have the same values on all procs.
2568 
2569    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2570    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2571    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2572    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2573    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2574    matrix, ans [DF] as another SeqAIJ matrix.
2575 
2576    When d_nz, o_nz parameters are specified, d_nz storage elements are
2577    allocated for every row of the local diagonal submatrix, and o_nz
2578    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2579    One way to choose d_nz and o_nz is to use the max nonzerors per local
2580    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2581    In this case, the values of d_nz,o_nz are:
2582 .vb
2583      proc0 : dnz = 2, o_nz = 2
2584      proc1 : dnz = 3, o_nz = 2
2585      proc2 : dnz = 1, o_nz = 4
2586 .ve
2587    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2588    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2589    for proc3. i.e we are using 12+15+10=37 storage locations to store
2590    34 values.
2591 
2592    When d_nnz, o_nnz parameters are specified, the storage is specified
2593    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2594    In the above case the values for d_nnz,o_nnz are:
2595 .vb
2596      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2597      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2598      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2599 .ve
2600    Here the space allocated is sum of all the above values i.e 34, and
2601    hence pre-allocation is perfect.
2602 
2603    Level: intermediate
2604 
2605 .keywords: matrix, aij, compressed row, sparse, parallel
2606 
2607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2608           MPIAIJ
2609 @*/
2610 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2611 {
2612   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2613 
2614   PetscFunctionBegin;
2615   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2616   if (f) {
2617     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2618   }
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 #undef __FUNCT__
2623 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2624 /*@C
2625      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2626          CSR format the local rows.
2627 
2628    Collective on MPI_Comm
2629 
2630    Input Parameters:
2631 +  comm - MPI communicator
2632 .  m - number of local rows (Cannot be PETSC_DECIDE)
2633 .  n - This value should be the same as the local size used in creating the
2634        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2635        calculated if N is given) For square matrices n is almost always m.
2636 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2637 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2638 .   i - row indices
2639 .   j - column indices
2640 -   a - matrix values
2641 
2642    Output Parameter:
2643 .   mat - the matrix
2644    Level: intermediate
2645 
2646    Notes:
2647        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2648      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2649      called this routine.
2650 
2651        The i and j indices are 0 based
2652 
2653 .keywords: matrix, aij, compressed row, sparse, parallel
2654 
2655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2656           MPIAIJ, MatCreateMPIAIJ()
2657 @*/
2658 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2659 {
2660   PetscErrorCode ierr;
2661 
2662  PetscFunctionBegin;
2663   if (i[0]) {
2664     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2665   }
2666   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2667   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2668   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2669   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatCreateMPIAIJ"
2675 /*@C
2676    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2677    (the default parallel PETSc format).  For good matrix assembly performance
2678    the user should preallocate the matrix storage by setting the parameters
2679    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2680    performance can be increased by more than a factor of 50.
2681 
2682    Collective on MPI_Comm
2683 
2684    Input Parameters:
2685 +  comm - MPI communicator
2686 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2687            This value should be the same as the local size used in creating the
2688            y vector for the matrix-vector product y = Ax.
2689 .  n - This value should be the same as the local size used in creating the
2690        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2691        calculated if N is given) For square matrices n is almost always m.
2692 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2693 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2694 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2695            (same value is used for all local rows)
2696 .  d_nnz - array containing the number of nonzeros in the various rows of the
2697            DIAGONAL portion of the local submatrix (possibly different for each row)
2698            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2699            The size of this array is equal to the number of local rows, i.e 'm'.
2700            You must leave room for the diagonal entry even if it is zero.
2701 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2702            submatrix (same value is used for all local rows).
2703 -  o_nnz - array containing the number of nonzeros in the various rows of the
2704            OFF-DIAGONAL portion of the local submatrix (possibly different for
2705            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2706            structure. The size of this array is equal to the number
2707            of local rows, i.e 'm'.
2708 
2709    Output Parameter:
2710 .  A - the matrix
2711 
2712    Notes:
2713    If the *_nnz parameter is given then the *_nz parameter is ignored
2714 
2715    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2716    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2717    storage requirements for this matrix.
2718 
2719    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2720    processor than it must be used on all processors that share the object for
2721    that argument.
2722 
2723    The user MUST specify either the local or global matrix dimensions
2724    (possibly both).
2725 
2726    The parallel matrix is partitioned such that the first m0 rows belong to
2727    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2728    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2729 
2730    The DIAGONAL portion of the local submatrix of a processor can be defined
2731    as the submatrix which is obtained by extraction the part corresponding
2732    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2733    first row that belongs to the processor, and r2 is the last row belonging
2734    to the this processor. This is a square mxm matrix. The remaining portion
2735    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2736 
2737    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2738 
2739    When calling this routine with a single process communicator, a matrix of
2740    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2741    type of communicator, use the construction mechanism:
2742      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2743 
2744    By default, this format uses inodes (identical nodes) when possible.
2745    We search for consecutive rows with the same nonzero structure, thereby
2746    reusing matrix information to achieve increased efficiency.
2747 
2748    Options Database Keys:
2749 +  -mat_no_inode  - Do not use inodes
2750 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2751 -  -mat_aij_oneindex - Internally use indexing starting at 1
2752         rather than 0.  Note that when calling MatSetValues(),
2753         the user still MUST index entries starting at 0!
2754 
2755 
2756    Example usage:
2757 
2758    Consider the following 8x8 matrix with 34 non-zero values, that is
2759    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2760    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2761    as follows:
2762 
2763 .vb
2764             1  2  0  |  0  3  0  |  0  4
2765     Proc0   0  5  6  |  7  0  0  |  8  0
2766             9  0 10  | 11  0  0  | 12  0
2767     -------------------------------------
2768            13  0 14  | 15 16 17  |  0  0
2769     Proc1   0 18  0  | 19 20 21  |  0  0
2770             0  0  0  | 22 23  0  | 24  0
2771     -------------------------------------
2772     Proc2  25 26 27  |  0  0 28  | 29  0
2773            30  0  0  | 31 32 33  |  0 34
2774 .ve
2775 
2776    This can be represented as a collection of submatrices as:
2777 
2778 .vb
2779       A B C
2780       D E F
2781       G H I
2782 .ve
2783 
2784    Where the submatrices A,B,C are owned by proc0, D,E,F are
2785    owned by proc1, G,H,I are owned by proc2.
2786 
2787    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2788    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2789    The 'M','N' parameters are 8,8, and have the same values on all procs.
2790 
2791    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2792    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2793    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2794    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2795    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2796    matrix, ans [DF] as another SeqAIJ matrix.
2797 
2798    When d_nz, o_nz parameters are specified, d_nz storage elements are
2799    allocated for every row of the local diagonal submatrix, and o_nz
2800    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2801    One way to choose d_nz and o_nz is to use the max nonzerors per local
2802    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2803    In this case, the values of d_nz,o_nz are:
2804 .vb
2805      proc0 : dnz = 2, o_nz = 2
2806      proc1 : dnz = 3, o_nz = 2
2807      proc2 : dnz = 1, o_nz = 4
2808 .ve
2809    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2810    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2811    for proc3. i.e we are using 12+15+10=37 storage locations to store
2812    34 values.
2813 
2814    When d_nnz, o_nnz parameters are specified, the storage is specified
2815    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2816    In the above case the values for d_nnz,o_nnz are:
2817 .vb
2818      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2819      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2820      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2821 .ve
2822    Here the space allocated is sum of all the above values i.e 34, and
2823    hence pre-allocation is perfect.
2824 
2825    Level: intermediate
2826 
2827 .keywords: matrix, aij, compressed row, sparse, parallel
2828 
2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2830           MPIAIJ, MatCreateMPIAIJWithArrays()
2831 @*/
2832 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2833 {
2834   PetscErrorCode ierr;
2835   PetscMPIInt    size;
2836 
2837   PetscFunctionBegin;
2838   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2839   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2840   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2841   if (size > 1) {
2842     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2843     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2844   } else {
2845     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2846     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2847   }
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2854 {
2855   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2856 
2857   PetscFunctionBegin;
2858   *Ad     = a->A;
2859   *Ao     = a->B;
2860   *colmap = a->garray;
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2866 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2867 {
2868   PetscErrorCode ierr;
2869   PetscInt       i;
2870   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2871 
2872   PetscFunctionBegin;
2873   if (coloring->ctype == IS_COLORING_LOCAL) {
2874     ISColoringValue *allcolors,*colors;
2875     ISColoring      ocoloring;
2876 
2877     /* set coloring for diagonal portion */
2878     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2879 
2880     /* set coloring for off-diagonal portion */
2881     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2882     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2883     for (i=0; i<a->B->cmap.n; i++) {
2884       colors[i] = allcolors[a->garray[i]];
2885     }
2886     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2887     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2888     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2889     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2890   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2891     ISColoringValue *colors;
2892     PetscInt        *larray;
2893     ISColoring      ocoloring;
2894 
2895     /* set coloring for diagonal portion */
2896     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2897     for (i=0; i<a->A->cmap.n; i++) {
2898       larray[i] = i + A->cmap.rstart;
2899     }
2900     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2901     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2902     for (i=0; i<a->A->cmap.n; i++) {
2903       colors[i] = coloring->colors[larray[i]];
2904     }
2905     ierr = PetscFree(larray);CHKERRQ(ierr);
2906     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2907     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2908     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2909 
2910     /* set coloring for off-diagonal portion */
2911     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2912     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2913     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2914     for (i=0; i<a->B->cmap.n; i++) {
2915       colors[i] = coloring->colors[larray[i]];
2916     }
2917     ierr = PetscFree(larray);CHKERRQ(ierr);
2918     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2919     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2920     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2921   } else {
2922     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2923   }
2924 
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #if defined(PETSC_HAVE_ADIC)
2929 #undef __FUNCT__
2930 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2931 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2932 {
2933   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2934   PetscErrorCode ierr;
2935 
2936   PetscFunctionBegin;
2937   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2938   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2939   PetscFunctionReturn(0);
2940 }
2941 #endif
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2945 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2946 {
2947   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2948   PetscErrorCode ierr;
2949 
2950   PetscFunctionBegin;
2951   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2952   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatMerge"
2958 /*@C
2959       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2960                  matrices from each processor
2961 
2962     Collective on MPI_Comm
2963 
2964    Input Parameters:
2965 +    comm - the communicators the parallel matrix will live on
2966 .    inmat - the input sequential matrices
2967 .    n - number of local columns (or PETSC_DECIDE)
2968 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2969 
2970    Output Parameter:
2971 .    outmat - the parallel matrix generated
2972 
2973     Level: advanced
2974 
2975    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2976 
2977 @*/
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2979 {
2980   PetscErrorCode ierr;
2981   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2982   PetscInt       *indx;
2983   PetscScalar    *values;
2984 
2985   PetscFunctionBegin;
2986   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2987   if (scall == MAT_INITIAL_MATRIX){
2988     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2989     if (n == PETSC_DECIDE){
2990       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2991     }
2992     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2993     rstart -= m;
2994 
2995     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2996     for (i=0;i<m;i++) {
2997       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2998       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2999       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3000     }
3001     /* This routine will ONLY return MPIAIJ type matrix */
3002     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3003     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3004     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3005     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3006     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3007 
3008   } else if (scall == MAT_REUSE_MATRIX){
3009     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3010   } else {
3011     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3012   }
3013 
3014   for (i=0;i<m;i++) {
3015     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3016     Ii    = i + rstart;
3017     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3018     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3019   }
3020   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3021   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3022   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3023 
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 #undef __FUNCT__
3028 #define __FUNCT__ "MatFileSplit"
3029 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3030 {
3031   PetscErrorCode    ierr;
3032   PetscMPIInt       rank;
3033   PetscInt          m,N,i,rstart,nnz;
3034   size_t            len;
3035   const PetscInt    *indx;
3036   PetscViewer       out;
3037   char              *name;
3038   Mat               B;
3039   const PetscScalar *values;
3040 
3041   PetscFunctionBegin;
3042   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3043   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3044   /* Should this be the type of the diagonal block of A? */
3045   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3046   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3047   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3048   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3049   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3050   for (i=0;i<m;i++) {
3051     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3052     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3053     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3054   }
3055   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057 
3058   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3059   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3060   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3061   sprintf(name,"%s.%d",outfile,rank);
3062   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3063   ierr = PetscFree(name);
3064   ierr = MatView(B,out);CHKERRQ(ierr);
3065   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3066   ierr = MatDestroy(B);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3073 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3074 {
3075   PetscErrorCode       ierr;
3076   Mat_Merge_SeqsToMPI  *merge;
3077   PetscObjectContainer container;
3078 
3079   PetscFunctionBegin;
3080   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3081   if (container) {
3082     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3083     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3084     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3085     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3086     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3087     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3088     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3089     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3090     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3091     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3092     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3093     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3094 
3095     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
3096     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3097   }
3098   ierr = PetscFree(merge);CHKERRQ(ierr);
3099 
3100   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #include "src/mat/utils/freespace.h"
3105 #include "petscbt.h"
3106 static PetscEvent logkey_seqstompinum = 0;
3107 #undef __FUNCT__
3108 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3109 /*@C
3110       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3111                  matrices from each processor
3112 
3113     Collective on MPI_Comm
3114 
3115    Input Parameters:
3116 +    comm - the communicators the parallel matrix will live on
3117 .    seqmat - the input sequential matrices
3118 .    m - number of local rows (or PETSC_DECIDE)
3119 .    n - number of local columns (or PETSC_DECIDE)
3120 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3121 
3122    Output Parameter:
3123 .    mpimat - the parallel matrix generated
3124 
3125     Level: advanced
3126 
3127    Notes:
3128      The dimensions of the sequential matrix in each processor MUST be the same.
3129      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3130      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3131 @*/
3132 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3133 {
3134   PetscErrorCode       ierr;
3135   MPI_Comm             comm=mpimat->comm;
3136   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3137   PetscMPIInt          size,rank,taga,*len_s;
3138   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3139   PetscInt             proc,m;
3140   PetscInt             **buf_ri,**buf_rj;
3141   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3142   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3143   MPI_Request          *s_waits,*r_waits;
3144   MPI_Status           *status;
3145   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3146   Mat_Merge_SeqsToMPI  *merge;
3147   PetscObjectContainer container;
3148 
3149   PetscFunctionBegin;
3150   if (!logkey_seqstompinum) {
3151     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3152   }
3153   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3154 
3155   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3156   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3157 
3158   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3159   if (container) {
3160     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3161   }
3162   bi     = merge->bi;
3163   bj     = merge->bj;
3164   buf_ri = merge->buf_ri;
3165   buf_rj = merge->buf_rj;
3166 
3167   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3168   owners = merge->rowmap.range;
3169   len_s  = merge->len_s;
3170 
3171   /* send and recv matrix values */
3172   /*-----------------------------*/
3173   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3174   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3175 
3176   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3177   for (proc=0,k=0; proc<size; proc++){
3178     if (!len_s[proc]) continue;
3179     i = owners[proc];
3180     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3181     k++;
3182   }
3183 
3184   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3185   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3186   ierr = PetscFree(status);CHKERRQ(ierr);
3187 
3188   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3189   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3190 
3191   /* insert mat values of mpimat */
3192   /*----------------------------*/
3193   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3194   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3195   nextrow = buf_ri_k + merge->nrecv;
3196   nextai  = nextrow + merge->nrecv;
3197 
3198   for (k=0; k<merge->nrecv; k++){
3199     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3200     nrows = *(buf_ri_k[k]);
3201     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3202     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3203   }
3204 
3205   /* set values of ba */
3206   m = merge->rowmap.n;
3207   for (i=0; i<m; i++) {
3208     arow = owners[rank] + i;
3209     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3210     bnzi = bi[i+1] - bi[i];
3211     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3212 
3213     /* add local non-zero vals of this proc's seqmat into ba */
3214     anzi = ai[arow+1] - ai[arow];
3215     aj   = a->j + ai[arow];
3216     aa   = a->a + ai[arow];
3217     nextaj = 0;
3218     for (j=0; nextaj<anzi; j++){
3219       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3220         ba_i[j] += aa[nextaj++];
3221       }
3222     }
3223 
3224     /* add received vals into ba */
3225     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3226       /* i-th row */
3227       if (i == *nextrow[k]) {
3228         anzi = *(nextai[k]+1) - *nextai[k];
3229         aj   = buf_rj[k] + *(nextai[k]);
3230         aa   = abuf_r[k] + *(nextai[k]);
3231         nextaj = 0;
3232         for (j=0; nextaj<anzi; j++){
3233           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3234             ba_i[j] += aa[nextaj++];
3235           }
3236         }
3237         nextrow[k]++; nextai[k]++;
3238       }
3239     }
3240     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3241   }
3242   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3243   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3244 
3245   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3246   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3247   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3248   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 static PetscEvent logkey_seqstompisym = 0;
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3255 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3256 {
3257   PetscErrorCode       ierr;
3258   Mat                  B_mpi;
3259   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3260   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3261   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3262   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3263   PetscInt             len,proc,*dnz,*onz;
3264   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3265   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3266   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3267   MPI_Status           *status;
3268   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3269   PetscBT              lnkbt;
3270   Mat_Merge_SeqsToMPI  *merge;
3271   PetscObjectContainer container;
3272 
3273   PetscFunctionBegin;
3274   if (!logkey_seqstompisym) {
3275     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3276   }
3277   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3278 
3279   /* make sure it is a PETSc comm */
3280   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3281   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3282   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3283 
3284   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3285   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3286 
3287   /* determine row ownership */
3288   /*---------------------------------------------------------*/
3289   merge->rowmap.n = m;
3290   merge->rowmap.N = M;
3291   merge->rowmap.bs = 1;
3292   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3293   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3294   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3295 
3296   m      = merge->rowmap.n;
3297   M      = merge->rowmap.N;
3298   owners = merge->rowmap.range;
3299 
3300   /* determine the number of messages to send, their lengths */
3301   /*---------------------------------------------------------*/
3302   len_s  = merge->len_s;
3303 
3304   len = 0;  /* length of buf_si[] */
3305   merge->nsend = 0;
3306   for (proc=0; proc<size; proc++){
3307     len_si[proc] = 0;
3308     if (proc == rank){
3309       len_s[proc] = 0;
3310     } else {
3311       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3312       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3313     }
3314     if (len_s[proc]) {
3315       merge->nsend++;
3316       nrows = 0;
3317       for (i=owners[proc]; i<owners[proc+1]; i++){
3318         if (ai[i+1] > ai[i]) nrows++;
3319       }
3320       len_si[proc] = 2*(nrows+1);
3321       len += len_si[proc];
3322     }
3323   }
3324 
3325   /* determine the number and length of messages to receive for ij-structure */
3326   /*-------------------------------------------------------------------------*/
3327   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3328   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3329 
3330   /* post the Irecv of j-structure */
3331   /*-------------------------------*/
3332   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3333   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3334 
3335   /* post the Isend of j-structure */
3336   /*--------------------------------*/
3337   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3338   sj_waits = si_waits + merge->nsend;
3339 
3340   for (proc=0, k=0; proc<size; proc++){
3341     if (!len_s[proc]) continue;
3342     i = owners[proc];
3343     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3344     k++;
3345   }
3346 
3347   /* receives and sends of j-structure are complete */
3348   /*------------------------------------------------*/
3349   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3350   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3351 
3352   /* send and recv i-structure */
3353   /*---------------------------*/
3354   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3355   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3356 
3357   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3358   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3359   for (proc=0,k=0; proc<size; proc++){
3360     if (!len_s[proc]) continue;
3361     /* form outgoing message for i-structure:
3362          buf_si[0]:                 nrows to be sent
3363                [1:nrows]:           row index (global)
3364                [nrows+1:2*nrows+1]: i-structure index
3365     */
3366     /*-------------------------------------------*/
3367     nrows = len_si[proc]/2 - 1;
3368     buf_si_i    = buf_si + nrows+1;
3369     buf_si[0]   = nrows;
3370     buf_si_i[0] = 0;
3371     nrows = 0;
3372     for (i=owners[proc]; i<owners[proc+1]; i++){
3373       anzi = ai[i+1] - ai[i];
3374       if (anzi) {
3375         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3376         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3377         nrows++;
3378       }
3379     }
3380     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3381     k++;
3382     buf_si += len_si[proc];
3383   }
3384 
3385   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3386   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3387 
3388   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3389   for (i=0; i<merge->nrecv; i++){
3390     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3391   }
3392 
3393   ierr = PetscFree(len_si);CHKERRQ(ierr);
3394   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3395   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3396   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3397   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3398   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3399   ierr = PetscFree(status);CHKERRQ(ierr);
3400 
3401   /* compute a local seq matrix in each processor */
3402   /*----------------------------------------------*/
3403   /* allocate bi array and free space for accumulating nonzero column info */
3404   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3405   bi[0] = 0;
3406 
3407   /* create and initialize a linked list */
3408   nlnk = N+1;
3409   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3410 
3411   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3412   len = 0;
3413   len  = ai[owners[rank+1]] - ai[owners[rank]];
3414   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3415   current_space = free_space;
3416 
3417   /* determine symbolic info for each local row */
3418   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3419   nextrow = buf_ri_k + merge->nrecv;
3420   nextai  = nextrow + merge->nrecv;
3421   for (k=0; k<merge->nrecv; k++){
3422     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3423     nrows = *buf_ri_k[k];
3424     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3425     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3426   }
3427 
3428   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3429   len = 0;
3430   for (i=0;i<m;i++) {
3431     bnzi   = 0;
3432     /* add local non-zero cols of this proc's seqmat into lnk */
3433     arow   = owners[rank] + i;
3434     anzi   = ai[arow+1] - ai[arow];
3435     aj     = a->j + ai[arow];
3436     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3437     bnzi += nlnk;
3438     /* add received col data into lnk */
3439     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3440       if (i == *nextrow[k]) { /* i-th row */
3441         anzi = *(nextai[k]+1) - *nextai[k];
3442         aj   = buf_rj[k] + *nextai[k];
3443         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3444         bnzi += nlnk;
3445         nextrow[k]++; nextai[k]++;
3446       }
3447     }
3448     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3449 
3450     /* if free space is not available, make more free space */
3451     if (current_space->local_remaining<bnzi) {
3452       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3453       nspacedouble++;
3454     }
3455     /* copy data into free space, then initialize lnk */
3456     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3457     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3458 
3459     current_space->array           += bnzi;
3460     current_space->local_used      += bnzi;
3461     current_space->local_remaining -= bnzi;
3462 
3463     bi[i+1] = bi[i] + bnzi;
3464   }
3465 
3466   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3467 
3468   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3469   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3470   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3471 
3472   /* create symbolic parallel matrix B_mpi */
3473   /*---------------------------------------*/
3474   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3475   if (n==PETSC_DECIDE) {
3476     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3477   } else {
3478     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3479   }
3480   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3481   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3482   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3483 
3484   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3485   B_mpi->assembled     = PETSC_FALSE;
3486   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3487   merge->bi            = bi;
3488   merge->bj            = bj;
3489   merge->buf_ri        = buf_ri;
3490   merge->buf_rj        = buf_rj;
3491   merge->coi           = PETSC_NULL;
3492   merge->coj           = PETSC_NULL;
3493   merge->owners_co     = PETSC_NULL;
3494 
3495   /* attach the supporting struct to B_mpi for reuse */
3496   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3497   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3498   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3499   *mpimat = B_mpi;
3500 
3501   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3502   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 static PetscEvent logkey_seqstompi = 0;
3507 #undef __FUNCT__
3508 #define __FUNCT__ "MatMerge_SeqsToMPI"
3509 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3510 {
3511   PetscErrorCode   ierr;
3512 
3513   PetscFunctionBegin;
3514   if (!logkey_seqstompi) {
3515     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3516   }
3517   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3518   if (scall == MAT_INITIAL_MATRIX){
3519     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3520   }
3521   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3522   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3523   PetscFunctionReturn(0);
3524 }
3525 static PetscEvent logkey_getlocalmat = 0;
3526 #undef __FUNCT__
3527 #define __FUNCT__ "MatGetLocalMat"
3528 /*@C
3529      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3530 
3531     Not Collective
3532 
3533    Input Parameters:
3534 +    A - the matrix
3535 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3536 
3537    Output Parameter:
3538 .    A_loc - the local sequential matrix generated
3539 
3540     Level: developer
3541 
3542 @*/
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3544 {
3545   PetscErrorCode  ierr;
3546   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3547   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3548   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3549   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3550   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3551   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3552 
3553   PetscFunctionBegin;
3554   if (!logkey_getlocalmat) {
3555     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3556   }
3557   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3558   if (scall == MAT_INITIAL_MATRIX){
3559     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3560     ci[0] = 0;
3561     for (i=0; i<am; i++){
3562       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3563     }
3564     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3565     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3566     k = 0;
3567     for (i=0; i<am; i++) {
3568       ncols_o = bi[i+1] - bi[i];
3569       ncols_d = ai[i+1] - ai[i];
3570       /* off-diagonal portion of A */
3571       for (jo=0; jo<ncols_o; jo++) {
3572         col = cmap[*bj];
3573         if (col >= cstart) break;
3574         cj[k]   = col; bj++;
3575         ca[k++] = *ba++;
3576       }
3577       /* diagonal portion of A */
3578       for (j=0; j<ncols_d; j++) {
3579         cj[k]   = cstart + *aj++;
3580         ca[k++] = *aa++;
3581       }
3582       /* off-diagonal portion of A */
3583       for (j=jo; j<ncols_o; j++) {
3584         cj[k]   = cmap[*bj++];
3585         ca[k++] = *ba++;
3586       }
3587     }
3588     /* put together the new matrix */
3589     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3590     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3591     /* Since these are PETSc arrays, change flags to free them as necessary. */
3592     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3593     mat->free_a  = PETSC_TRUE;
3594     mat->free_ij = PETSC_TRUE;
3595     mat->nonew   = 0;
3596   } else if (scall == MAT_REUSE_MATRIX){
3597     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3598     ci = mat->i; cj = mat->j; ca = mat->a;
3599     for (i=0; i<am; i++) {
3600       /* off-diagonal portion of A */
3601       ncols_o = bi[i+1] - bi[i];
3602       for (jo=0; jo<ncols_o; jo++) {
3603         col = cmap[*bj];
3604         if (col >= cstart) break;
3605         *ca++ = *ba++; bj++;
3606       }
3607       /* diagonal portion of A */
3608       ncols_d = ai[i+1] - ai[i];
3609       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3610       /* off-diagonal portion of A */
3611       for (j=jo; j<ncols_o; j++) {
3612         *ca++ = *ba++; bj++;
3613       }
3614     }
3615   } else {
3616     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3617   }
3618 
3619   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3620   PetscFunctionReturn(0);
3621 }
3622 
3623 static PetscEvent logkey_getlocalmatcondensed = 0;
3624 #undef __FUNCT__
3625 #define __FUNCT__ "MatGetLocalMatCondensed"
3626 /*@C
3627      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3628 
3629     Not Collective
3630 
3631    Input Parameters:
3632 +    A - the matrix
3633 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3634 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3635 
3636    Output Parameter:
3637 .    A_loc - the local sequential matrix generated
3638 
3639     Level: developer
3640 
3641 @*/
3642 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3643 {
3644   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3645   PetscErrorCode    ierr;
3646   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3647   IS                isrowa,iscola;
3648   Mat               *aloc;
3649 
3650   PetscFunctionBegin;
3651   if (!logkey_getlocalmatcondensed) {
3652     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3653   }
3654   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3655   if (!row){
3656     start = A->rmap.rstart; end = A->rmap.rend;
3657     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3658   } else {
3659     isrowa = *row;
3660   }
3661   if (!col){
3662     start = A->cmap.rstart;
3663     cmap  = a->garray;
3664     nzA   = a->A->cmap.n;
3665     nzB   = a->B->cmap.n;
3666     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3667     ncols = 0;
3668     for (i=0; i<nzB; i++) {
3669       if (cmap[i] < start) idx[ncols++] = cmap[i];
3670       else break;
3671     }
3672     imark = i;
3673     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3674     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3675     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3676     ierr = PetscFree(idx);CHKERRQ(ierr);
3677   } else {
3678     iscola = *col;
3679   }
3680   if (scall != MAT_INITIAL_MATRIX){
3681     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3682     aloc[0] = *A_loc;
3683   }
3684   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3685   *A_loc = aloc[0];
3686   ierr = PetscFree(aloc);CHKERRQ(ierr);
3687   if (!row){
3688     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3689   }
3690   if (!col){
3691     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3692   }
3693   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 static PetscEvent logkey_GetBrowsOfAcols = 0;
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatGetBrowsOfAcols"
3700 /*@C
3701     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3702 
3703     Collective on Mat
3704 
3705    Input Parameters:
3706 +    A,B - the matrices in mpiaij format
3707 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3708 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3709 
3710    Output Parameter:
3711 +    rowb, colb - index sets of rows and columns of B to extract
3712 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3713 -    B_seq - the sequential matrix generated
3714 
3715     Level: developer
3716 
3717 @*/
3718 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3719 {
3720   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3721   PetscErrorCode    ierr;
3722   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3723   IS                isrowb,iscolb;
3724   Mat               *bseq;
3725 
3726   PetscFunctionBegin;
3727   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3728     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3729   }
3730   if (!logkey_GetBrowsOfAcols) {
3731     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3732   }
3733   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3734 
3735   if (scall == MAT_INITIAL_MATRIX){
3736     start = A->cmap.rstart;
3737     cmap  = a->garray;
3738     nzA   = a->A->cmap.n;
3739     nzB   = a->B->cmap.n;
3740     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3741     ncols = 0;
3742     for (i=0; i<nzB; i++) {  /* row < local row index */
3743       if (cmap[i] < start) idx[ncols++] = cmap[i];
3744       else break;
3745     }
3746     imark = i;
3747     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3748     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3749     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3750     ierr = PetscFree(idx);CHKERRQ(ierr);
3751     *brstart = imark;
3752     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3753   } else {
3754     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3755     isrowb = *rowb; iscolb = *colb;
3756     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3757     bseq[0] = *B_seq;
3758   }
3759   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3760   *B_seq = bseq[0];
3761   ierr = PetscFree(bseq);CHKERRQ(ierr);
3762   if (!rowb){
3763     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3764   } else {
3765     *rowb = isrowb;
3766   }
3767   if (!colb){
3768     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3769   } else {
3770     *colb = iscolb;
3771   }
3772   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 static PetscEvent logkey_GetBrowsOfAocols = 0;
3777 #undef __FUNCT__
3778 #define __FUNCT__ "MatGetBrowsOfAoCols"
3779 /*@C
3780     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3781     of the OFF-DIAGONAL portion of local A
3782 
3783     Collective on Mat
3784 
3785    Input Parameters:
3786 +    A,B - the matrices in mpiaij format
3787 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3788 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3789 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3790 
3791    Output Parameter:
3792 +    B_oth - the sequential matrix generated
3793 
3794     Level: developer
3795 
3796 @*/
3797 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3798 {
3799   VecScatter_MPI_General *gen_to,*gen_from;
3800   PetscErrorCode         ierr;
3801   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3802   Mat_SeqAIJ             *b_oth;
3803   VecScatter             ctx=a->Mvctx;
3804   MPI_Comm               comm=ctx->comm;
3805   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3806   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3807   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3808   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3809   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3810   MPI_Status             *sstatus,rstatus;
3811   PetscInt               *cols;
3812   PetscScalar            *vals;
3813   PetscMPIInt            j;
3814 
3815   PetscFunctionBegin;
3816   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3817     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3818   }
3819   if (!logkey_GetBrowsOfAocols) {
3820     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3821   }
3822   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3823   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3824 
3825   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3826   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3827   rvalues  = gen_from->values; /* holds the length of sending row */
3828   svalues  = gen_to->values;   /* holds the length of receiving row */
3829   nrecvs   = gen_from->n;
3830   nsends   = gen_to->n;
3831 
3832   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3833   srow     = gen_to->indices;   /* local row index to be sent */
3834   rstarts  = gen_from->starts;
3835   sstarts  = gen_to->starts;
3836   rprocs   = gen_from->procs;
3837   sprocs   = gen_to->procs;
3838   sstatus  = gen_to->sstatus;
3839 
3840   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3841   if (scall == MAT_INITIAL_MATRIX){
3842     /* i-array */
3843     /*---------*/
3844     /*  post receives */
3845     for (i=0; i<nrecvs; i++){
3846       rowlen = (PetscInt*)rvalues + rstarts[i];
3847       nrows = rstarts[i+1]-rstarts[i];
3848       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3849     }
3850 
3851     /* pack the outgoing message */
3852     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3853     rstartsj = sstartsj + nsends +1;
3854     sstartsj[0] = 0;  rstartsj[0] = 0;
3855     len = 0; /* total length of j or a array to be sent */
3856     k = 0;
3857     for (i=0; i<nsends; i++){
3858       rowlen = (PetscInt*)svalues + sstarts[i];
3859       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3860       for (j=0; j<nrows; j++) {
3861         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3862         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3863         len += rowlen[j];
3864         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3865         k++;
3866       }
3867       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3868        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3869     }
3870     /* recvs and sends of i-array are completed */
3871     i = nrecvs;
3872     while (i--) {
3873       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3874     }
3875     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3876     /* allocate buffers for sending j and a arrays */
3877     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3878     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3879 
3880     /* create i-array of B_oth */
3881     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3882     b_othi[0] = 0;
3883     len = 0; /* total length of j or a array to be received */
3884     k = 0;
3885     for (i=0; i<nrecvs; i++){
3886       rowlen = (PetscInt*)rvalues + rstarts[i];
3887       nrows = rstarts[i+1]-rstarts[i];
3888       for (j=0; j<nrows; j++) {
3889         b_othi[k+1] = b_othi[k] + rowlen[j];
3890         len += rowlen[j]; k++;
3891       }
3892       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3893     }
3894 
3895     /* allocate space for j and a arrrays of B_oth */
3896     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3897     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3898 
3899     /* j-array */
3900     /*---------*/
3901     /*  post receives of j-array */
3902     for (i=0; i<nrecvs; i++){
3903       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3904       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3905     }
3906     k = 0;
3907     for (i=0; i<nsends; i++){
3908       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3909       bufJ = bufj+sstartsj[i];
3910       for (j=0; j<nrows; j++) {
3911         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3912         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3913         for (l=0; l<ncols; l++){
3914           *bufJ++ = cols[l];
3915         }
3916         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3917       }
3918       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3919     }
3920 
3921     /* recvs and sends of j-array are completed */
3922     i = nrecvs;
3923     while (i--) {
3924       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3925     }
3926     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3927   } else if (scall == MAT_REUSE_MATRIX){
3928     sstartsj = *startsj;
3929     rstartsj = sstartsj + nsends +1;
3930     bufa     = *bufa_ptr;
3931     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3932     b_otha   = b_oth->a;
3933   } else {
3934     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3935   }
3936 
3937   /* a-array */
3938   /*---------*/
3939   /*  post receives of a-array */
3940   for (i=0; i<nrecvs; i++){
3941     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3942     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3943   }
3944   k = 0;
3945   for (i=0; i<nsends; i++){
3946     nrows = sstarts[i+1]-sstarts[i];
3947     bufA = bufa+sstartsj[i];
3948     for (j=0; j<nrows; j++) {
3949       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3950       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3951       for (l=0; l<ncols; l++){
3952         *bufA++ = vals[l];
3953       }
3954       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3955 
3956     }
3957     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3958   }
3959   /* recvs and sends of a-array are completed */
3960   i = nrecvs;
3961   while (i--) {
3962     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3963   }
3964   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3965   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3966 
3967   if (scall == MAT_INITIAL_MATRIX){
3968     /* put together the new matrix */
3969     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3970 
3971     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3972     /* Since these are PETSc arrays, change flags to free them as necessary. */
3973     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
3974     b_oth->free_a  = PETSC_TRUE;
3975     b_oth->free_ij = PETSC_TRUE;
3976     b_oth->nonew   = 0;
3977 
3978     ierr = PetscFree(bufj);CHKERRQ(ierr);
3979     if (!startsj || !bufa_ptr){
3980       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3981       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3982     } else {
3983       *startsj  = sstartsj;
3984       *bufa_ptr = bufa;
3985     }
3986   }
3987   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3988 
3989   PetscFunctionReturn(0);
3990 }
3991 
3992 #undef __FUNCT__
3993 #define __FUNCT__ "MatGetCommunicationStructs"
3994 /*@C
3995   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3996 
3997   Not Collective
3998 
3999   Input Parameters:
4000 . A - The matrix in mpiaij format
4001 
4002   Output Parameter:
4003 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4004 . colmap - A map from global column index to local index into lvec
4005 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4006 
4007   Level: developer
4008 
4009 @*/
4010 #if defined (PETSC_USE_CTABLE)
4011 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4012 #else
4013 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4014 #endif
4015 {
4016   Mat_MPIAIJ *a;
4017 
4018   PetscFunctionBegin;
4019   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4020   PetscValidPointer(lvec, 2)
4021   PetscValidPointer(colmap, 3)
4022   PetscValidPointer(multScatter, 4)
4023   a = (Mat_MPIAIJ *) A->data;
4024   if (lvec) *lvec = a->lvec;
4025   if (colmap) *colmap = a->colmap;
4026   if (multScatter) *multScatter = a->Mvctx;
4027   PetscFunctionReturn(0);
4028 }
4029 
4030 EXTERN_C_BEGIN
4031 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4032 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4033 EXTERN_C_END
4034 
4035 /*MC
4036    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4037 
4038    Options Database Keys:
4039 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4040 
4041   Level: beginner
4042 
4043 .seealso: MatCreateMPIAIJ
4044 M*/
4045 
4046 EXTERN_C_BEGIN
4047 #undef __FUNCT__
4048 #define __FUNCT__ "MatCreate_MPIAIJ"
4049 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4050 {
4051   Mat_MPIAIJ     *b;
4052   PetscErrorCode ierr;
4053   PetscMPIInt    size;
4054 
4055   PetscFunctionBegin;
4056   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4057 
4058   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4059   B->data         = (void*)b;
4060   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4061   B->factor       = 0;
4062   B->rmap.bs      = 1;
4063   B->assembled    = PETSC_FALSE;
4064   B->mapping      = 0;
4065 
4066   B->insertmode      = NOT_SET_VALUES;
4067   b->size            = size;
4068   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4069 
4070   /* build cache for off array entries formed */
4071   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4072   b->donotstash  = PETSC_FALSE;
4073   b->colmap      = 0;
4074   b->garray      = 0;
4075   b->roworiented = PETSC_TRUE;
4076 
4077   /* stuff used for matrix vector multiply */
4078   b->lvec      = PETSC_NULL;
4079   b->Mvctx     = PETSC_NULL;
4080 
4081   /* stuff for MatGetRow() */
4082   b->rowindices   = 0;
4083   b->rowvalues    = 0;
4084   b->getrowactive = PETSC_FALSE;
4085 
4086 
4087   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4088                                      "MatStoreValues_MPIAIJ",
4089                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4090   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4091                                      "MatRetrieveValues_MPIAIJ",
4092                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4093   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4094 				     "MatGetDiagonalBlock_MPIAIJ",
4095                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4096   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4097 				     "MatIsTranspose_MPIAIJ",
4098 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4099   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4100 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4101 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4103 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4104 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4106 				     "MatDiagonalScaleLocal_MPIAIJ",
4107 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4109                                      "MatConvert_MPIAIJ_MPICSRPERM",
4110                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4112                                      "MatConvert_MPIAIJ_MPICRL",
4113                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4114   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4115   PetscFunctionReturn(0);
4116 }
4117 EXTERN_C_END
4118 
4119 /*
4120     Special version for direct calls from Fortran
4121 */
4122 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4123 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4125 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4126 #endif
4127 
4128 /* Change these macros so can be used in void function */
4129 #undef CHKERRQ
4130 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4131 #undef SETERRQ2
4132 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4133 #undef SETERRQ
4134 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4135 
4136 EXTERN_C_BEGIN
4137 #undef __FUNCT__
4138 #define __FUNCT__ "matsetvaluesmpiaij_"
4139 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4140 {
4141   Mat            mat = *mmat;
4142   PetscInt       m = *mm, n = *mn;
4143   InsertMode     addv = *maddv;
4144   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4145   PetscScalar    value;
4146   PetscErrorCode ierr;
4147 
4148   MatPreallocated(mat);
4149   if (mat->insertmode == NOT_SET_VALUES) {
4150     mat->insertmode = addv;
4151   }
4152 #if defined(PETSC_USE_DEBUG)
4153   else if (mat->insertmode != addv) {
4154     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4155   }
4156 #endif
4157   {
4158   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4159   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4160   PetscTruth     roworiented = aij->roworiented;
4161 
4162   /* Some Variables required in the macro */
4163   Mat            A = aij->A;
4164   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4165   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4166   PetscScalar    *aa = a->a;
4167   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4168   Mat            B = aij->B;
4169   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4170   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4171   PetscScalar    *ba = b->a;
4172 
4173   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4174   PetscInt       nonew = a->nonew;
4175   PetscScalar    *ap1,*ap2;
4176 
4177   PetscFunctionBegin;
4178   for (i=0; i<m; i++) {
4179     if (im[i] < 0) continue;
4180 #if defined(PETSC_USE_DEBUG)
4181     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4182 #endif
4183     if (im[i] >= rstart && im[i] < rend) {
4184       row      = im[i] - rstart;
4185       lastcol1 = -1;
4186       rp1      = aj + ai[row];
4187       ap1      = aa + ai[row];
4188       rmax1    = aimax[row];
4189       nrow1    = ailen[row];
4190       low1     = 0;
4191       high1    = nrow1;
4192       lastcol2 = -1;
4193       rp2      = bj + bi[row];
4194       ap2      = ba + bi[row];
4195       rmax2    = bimax[row];
4196       nrow2    = bilen[row];
4197       low2     = 0;
4198       high2    = nrow2;
4199 
4200       for (j=0; j<n; j++) {
4201         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4202         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4203         if (in[j] >= cstart && in[j] < cend){
4204           col = in[j] - cstart;
4205           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4206         } else if (in[j] < 0) continue;
4207 #if defined(PETSC_USE_DEBUG)
4208         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4209 #endif
4210         else {
4211           if (mat->was_assembled) {
4212             if (!aij->colmap) {
4213               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4214             }
4215 #if defined (PETSC_USE_CTABLE)
4216             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4217 	    col--;
4218 #else
4219             col = aij->colmap[in[j]] - 1;
4220 #endif
4221             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4222               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4223               col =  in[j];
4224               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4225               B = aij->B;
4226               b = (Mat_SeqAIJ*)B->data;
4227               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4228               rp2      = bj + bi[row];
4229               ap2      = ba + bi[row];
4230               rmax2    = bimax[row];
4231               nrow2    = bilen[row];
4232               low2     = 0;
4233               high2    = nrow2;
4234               bm       = aij->B->rmap.n;
4235               ba = b->a;
4236             }
4237           } else col = in[j];
4238           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4239         }
4240       }
4241     } else {
4242       if (!aij->donotstash) {
4243         if (roworiented) {
4244           if (ignorezeroentries && v[i*n] == 0.0) continue;
4245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4246         } else {
4247           if (ignorezeroentries && v[i] == 0.0) continue;
4248           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4249         }
4250       }
4251     }
4252   }}
4253   PetscFunctionReturnVoid();
4254 }
4255 EXTERN_C_END
4256