xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 38f2d2fdb3b6f522a3102c6eb796cebecf3224c0)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216   case MAT_DO_NOT_IGNORE_ZERO_ENTRIES:
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROW_ORIENTED:
1221     a->roworiented = PETSC_TRUE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_ROWS_SORTED:
1226   case MAT_ROWS_UNSORTED:
1227   case MAT_YES_NEW_DIAGONALS:
1228     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1229     break;
1230   case MAT_COLUMN_ORIENTED:
1231     a->roworiented = PETSC_FALSE;
1232     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1233     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_OFF_PROC_ENTRIES:
1236     a->donotstash = PETSC_TRUE;
1237     break;
1238   case MAT_NO_NEW_DIAGONALS:
1239     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     break;
1248   case MAT_NOT_SYMMETRIC:
1249   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1250   case MAT_NOT_HERMITIAN:
1251   case MAT_NOT_SYMMETRY_ETERNAL:
1252     break;
1253   default:
1254     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRow_MPIAIJ"
1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1264   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265   PetscErrorCode ierr;
1266   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1267   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1268   PetscInt       *cmap,*idx_p;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1279     PetscInt     max = 1,tmp;
1280     for (i=0; i<matin->rmap.n; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1285     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1286   }
1287 
1288   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1289   lrow = row - rstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       PetscInt imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for (i=0; i<nzB; i++) {
1306           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1311         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for (i=0; i<imark; i++) {
1317             idx_p[i] = cmap[cworkB[i]];
1318           }
1319         } else {
1320           for (i=0; i<nzB; i++) {
1321             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!aij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1349   }
1350   aij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatNorm_MPIAIJ"
1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1357 {
1358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1359   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,j,cstart = mat->cmap.rstart;
1362   PetscReal      sum = 0.0;
1363   PetscScalar    *v;
1364 
1365   PetscFunctionBegin;
1366   if (aij->size == 1) {
1367     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1368   } else {
1369     if (type == NORM_FROBENIUS) {
1370       v = amat->a;
1371       for (i=0; i<amat->nz; i++) {
1372 #if defined(PETSC_USE_COMPLEX)
1373         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1374 #else
1375         sum += (*v)*(*v); v++;
1376 #endif
1377       }
1378       v = bmat->a;
1379       for (i=0; i<bmat->nz; i++) {
1380 #if defined(PETSC_USE_COMPLEX)
1381         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1382 #else
1383         sum += (*v)*(*v); v++;
1384 #endif
1385       }
1386       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       *norm = sqrt(*norm);
1388     } else if (type == NORM_1) { /* max column norm */
1389       PetscReal *tmp,*tmp2;
1390       PetscInt    *jj,*garray = aij->garray;
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1393       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1394       *norm = 0.0;
1395       v = amat->a; jj = amat->j;
1396       for (j=0; j<amat->nz; j++) {
1397         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1398       }
1399       v = bmat->a; jj = bmat->j;
1400       for (j=0; j<bmat->nz; j++) {
1401         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1402       }
1403       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1404       for (j=0; j<mat->cmap.N; j++) {
1405         if (tmp2[j] > *norm) *norm = tmp2[j];
1406       }
1407       ierr = PetscFree(tmp);CHKERRQ(ierr);
1408       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1409     } else if (type == NORM_INFINITY) { /* max row norm */
1410       PetscReal ntemp = 0.0;
1411       for (j=0; j<aij->A->rmap.n; j++) {
1412         v = amat->a + amat->i[j];
1413         sum = 0.0;
1414         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1415           sum += PetscAbsScalar(*v); v++;
1416         }
1417         v = bmat->a + bmat->i[j];
1418         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1419           sum += PetscAbsScalar(*v); v++;
1420         }
1421         if (sum > ntemp) ntemp = sum;
1422       }
1423       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1424     } else {
1425       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1426     }
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatTranspose_MPIAIJ"
1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1434 {
1435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1436   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1437   PetscErrorCode ierr;
1438   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1439   PetscInt       cstart=A->cmap.rstart,ncol;
1440   Mat            B;
1441   PetscScalar    *array;
1442 
1443   PetscFunctionBegin;
1444   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445 
1446   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1447   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1448   ai = Aloc->i; aj = Aloc->j;
1449   bi = Bloc->i; bj = Bloc->j;
1450   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1451   cols = d_nnz + na + 1; /* work space to be used by B part */
1452   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1453   for (i=0; i<ai[ma]; i++){
1454     d_nnz[aj[i]] ++;
1455     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1456   }
1457 
1458   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1459   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1460   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1461   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1462 
1463   /* copy over the A part */
1464   array = Aloc->a;
1465   row = A->rmap.rstart;
1466   for (i=0; i<ma; i++) {
1467     ncol = ai[i+1]-ai[i];
1468     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1469     row++; array += ncol; aj += ncol;
1470   }
1471   aj = Aloc->j;
1472   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1473 
1474   /* copy over the B part */
1475   array = Bloc->a;
1476   row = A->rmap.rstart;
1477   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1478   for (i=0; i<mb; i++) {
1479     ncol = bi[i+1]-bi[i];
1480     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1481     row++; array += ncol; cols += ncol;
1482   }
1483   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1484   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1485   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1486   if (matout) {
1487     *matout = B;
1488   } else {
1489     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1496 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1497 {
1498   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1499   Mat            a = aij->A,b = aij->B;
1500   PetscErrorCode ierr;
1501   PetscInt       s1,s2,s3;
1502 
1503   PetscFunctionBegin;
1504   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1505   if (rr) {
1506     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1507     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1508     /* Overlap communication with computation. */
1509     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1510   }
1511   if (ll) {
1512     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1513     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1514     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1515   }
1516   /* scale  the diagonal block */
1517   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1518 
1519   if (rr) {
1520     /* Do a scatter end and then right scale the off-diagonal block */
1521     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1523   }
1524 
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1530 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1531 {
1532   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1537   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1542 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1543 {
1544   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1545   PetscErrorCode ierr;
1546 
1547   PetscFunctionBegin;
1548   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatEqual_MPIAIJ"
1554 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1555 {
1556   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1557   Mat            a,b,c,d;
1558   PetscTruth     flg;
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   a = matA->A; b = matA->B;
1563   c = matB->A; d = matB->B;
1564 
1565   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1566   if (flg) {
1567     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1568   }
1569   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatCopy_MPIAIJ"
1575 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1576 {
1577   PetscErrorCode ierr;
1578   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1579   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1580 
1581   PetscFunctionBegin;
1582   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1583   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1584     /* because of the column compression in the off-processor part of the matrix a->B,
1585        the number of columns in a->B and b->B may be different, hence we cannot call
1586        the MatCopy() directly on the two parts. If need be, we can provide a more
1587        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1588        then copying the submatrices */
1589     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1590   } else {
1591     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1592     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1599 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1600 {
1601   PetscErrorCode ierr;
1602 
1603   PetscFunctionBegin;
1604   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #include "petscblaslapack.h"
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatAXPY_MPIAIJ"
1611 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1612 {
1613   PetscErrorCode ierr;
1614   PetscInt       i;
1615   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1616   PetscBLASInt   bnz,one=1;
1617   Mat_SeqAIJ     *x,*y;
1618 
1619   PetscFunctionBegin;
1620   if (str == SAME_NONZERO_PATTERN) {
1621     PetscScalar alpha = a;
1622     x = (Mat_SeqAIJ *)xx->A->data;
1623     y = (Mat_SeqAIJ *)yy->A->data;
1624     bnz = (PetscBLASInt)x->nz;
1625     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1626     x = (Mat_SeqAIJ *)xx->B->data;
1627     y = (Mat_SeqAIJ *)yy->B->data;
1628     bnz = (PetscBLASInt)x->nz;
1629     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1630   } else if (str == SUBSET_NONZERO_PATTERN) {
1631     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1632 
1633     x = (Mat_SeqAIJ *)xx->B->data;
1634     y = (Mat_SeqAIJ *)yy->B->data;
1635     if (y->xtoy && y->XtoY != xx->B) {
1636       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1637       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1638     }
1639     if (!y->xtoy) { /* get xtoy */
1640       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1641       y->XtoY = xx->B;
1642       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1643     }
1644     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1645   } else {
1646     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatConjugate_MPIAIJ"
1655 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1656 {
1657 #if defined(PETSC_USE_COMPLEX)
1658   PetscErrorCode ierr;
1659   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1663   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1664 #else
1665   PetscFunctionBegin;
1666 #endif
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatRealPart_MPIAIJ"
1672 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1673 {
1674   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1679   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1685 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1686 {
1687   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1692   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 #ifdef PETSC_HAVE_PBGL
1697 
1698 #include <boost/parallel/mpi/bsp_process_group.hpp>
1699 #include <boost/graph/distributed/ilu_default_graph.hpp>
1700 #include <boost/graph/distributed/ilu_0_block.hpp>
1701 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1702 #include <boost/graph/distributed/petsc/interface.hpp>
1703 #include <boost/multi_array.hpp>
1704 #include <boost/parallel/distributed_property_map.hpp>
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1708 /*
1709   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1710 */
1711 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1712 {
1713   namespace petsc = boost::distributed::petsc;
1714 
1715   namespace graph_dist = boost::graph::distributed;
1716   using boost::graph::distributed::ilu_default::process_group_type;
1717   using boost::graph::ilu_permuted;
1718 
1719   PetscTruth      row_identity, col_identity;
1720   PetscContainer  c;
1721   PetscInt        m, n, M, N;
1722   PetscErrorCode  ierr;
1723 
1724   PetscFunctionBegin;
1725   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1726   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1727   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1728   if (!row_identity || !col_identity) {
1729     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1730   }
1731 
1732   process_group_type pg;
1733   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1734   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1735   lgraph_type&   level_graph = *lgraph_p;
1736   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1737 
1738   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1739   ilu_permuted(level_graph);
1740 
1741   /* put together the new matrix */
1742   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1743   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1744   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1745   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1746   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1747   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1748   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1749   (*fact)->factor = FACTOR_LU;
1750 
1751   ierr = PetscContainerCreate(A->comm, &c);
1752   ierr = PetscContainerSetPointer(c, lgraph_p);
1753   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1759 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1760 {
1761   PetscFunctionBegin;
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatSolve_MPIAIJ"
1767 /*
1768   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1769 */
1770 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1771 {
1772   namespace graph_dist = boost::graph::distributed;
1773 
1774   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1775   lgraph_type*   lgraph_p;
1776   PetscContainer c;
1777   PetscErrorCode ierr;
1778 
1779   PetscFunctionBegin;
1780   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1781   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1782   ierr = VecCopy(b, x); CHKERRQ(ierr);
1783 
1784   PetscScalar* array_x;
1785   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1786   PetscInt sx;
1787   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1788 
1789   PetscScalar* array_b;
1790   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1791   PetscInt sb;
1792   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1793 
1794   lgraph_type&   level_graph = *lgraph_p;
1795   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1796 
1797   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1798   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1799                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1800 
1801   typedef boost::iterator_property_map<array_ref_type::iterator,
1802                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1803   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1804                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1805 
1806   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1807 
1808   PetscFunctionReturn(0);
1809 }
1810 #endif
1811 
1812 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1813   PetscInt       nzlocal,nsends,nrecvs;
1814   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1815   PetscScalar    *sbuf_a,**rbuf_a;
1816   PetscErrorCode (*MatDestroy)(Mat);
1817 } Mat_Redundant;
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1821 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1822 {
1823   PetscErrorCode       ierr;
1824   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1825   PetscInt             i;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1829   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1830   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1831   for (i=0; i<redund->nrecvs; i++){
1832     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1833     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1834   }
1835   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1836   ierr = PetscFree(redund);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatDestroy_MatRedundant"
1842 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1843 {
1844   PetscErrorCode  ierr;
1845   PetscContainer  container;
1846   Mat_Redundant   *redund=PETSC_NULL;
1847 
1848   PetscFunctionBegin;
1849   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1850   if (container) {
1851     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1852   } else {
1853     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1854   }
1855   A->ops->destroy = redund->MatDestroy;
1856   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1857   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1858   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1864 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1865 {
1866   PetscMPIInt    rank,size;
1867   MPI_Comm       comm=mat->comm;
1868   PetscErrorCode ierr;
1869   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1870   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1871   PetscInt       *rowrange=mat->rmap.range;
1872   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1873   Mat            A=aij->A,B=aij->B,C=*matredundant;
1874   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1875   PetscScalar    *sbuf_a;
1876   PetscInt       nzlocal=a->nz+b->nz;
1877   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1878   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1879   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1880   PetscScalar    *vals,*aworkA,*aworkB;
1881   PetscMPIInt    tag1,tag2,tag3,imdex;
1882   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1883                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1884   MPI_Status     recv_status,*send_status;
1885   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1886   PetscInt       **rbuf_j=PETSC_NULL;
1887   PetscScalar    **rbuf_a=PETSC_NULL;
1888   Mat_Redundant  *redund=PETSC_NULL;
1889   PetscContainer container;
1890 
1891   PetscFunctionBegin;
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1894 
1895   if (reuse == MAT_REUSE_MATRIX) {
1896     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1897     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1898     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1899     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1900     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1901     if (container) {
1902       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1903     } else {
1904       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1905     }
1906     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1907 
1908     nsends    = redund->nsends;
1909     nrecvs    = redund->nrecvs;
1910     send_rank = redund->send_rank; recv_rank = send_rank + size;
1911     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1912     sbuf_j    = redund->sbuf_j;
1913     sbuf_a    = redund->sbuf_a;
1914     rbuf_j    = redund->rbuf_j;
1915     rbuf_a    = redund->rbuf_a;
1916   }
1917 
1918   if (reuse == MAT_INITIAL_MATRIX){
1919     PetscMPIInt  subrank,subsize;
1920     PetscInt     nleftover,np_subcomm;
1921     /* get the destination processors' id send_rank, nsends and nrecvs */
1922     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1923     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1924     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1925     recv_rank = send_rank + size;
1926     np_subcomm = size/nsubcomm;
1927     nleftover  = size - nsubcomm*np_subcomm;
1928     nsends = 0; nrecvs = 0;
1929     for (i=0; i<size; i++){ /* i=rank*/
1930       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1931         send_rank[nsends] = i; nsends++;
1932         recv_rank[nrecvs++] = i;
1933       }
1934     }
1935     if (rank >= size - nleftover){/* this proc is a leftover processor */
1936       i = size-nleftover-1;
1937       j = 0;
1938       while (j < nsubcomm - nleftover){
1939         send_rank[nsends++] = i;
1940         i--; j++;
1941       }
1942     }
1943 
1944     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1945       for (i=0; i<nleftover; i++){
1946         recv_rank[nrecvs++] = size-nleftover+i;
1947       }
1948     }
1949 
1950     /* allocate sbuf_j, sbuf_a */
1951     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1952     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1953     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1954   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1955 
1956   /* copy mat's local entries into the buffers */
1957   if (reuse == MAT_INITIAL_MATRIX){
1958     rownz_max = 0;
1959     rptr = sbuf_j;
1960     cols = sbuf_j + rend-rstart + 1;
1961     vals = sbuf_a;
1962     rptr[0] = 0;
1963     for (i=0; i<rend-rstart; i++){
1964       row = i + rstart;
1965       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1966       ncols  = nzA + nzB;
1967       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1968       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1969       /* load the column indices for this row into cols */
1970       lwrite = 0;
1971       for (l=0; l<nzB; l++) {
1972         if ((ctmp = bmap[cworkB[l]]) < cstart){
1973           vals[lwrite]   = aworkB[l];
1974           cols[lwrite++] = ctmp;
1975         }
1976       }
1977       for (l=0; l<nzA; l++){
1978         vals[lwrite]   = aworkA[l];
1979         cols[lwrite++] = cstart + cworkA[l];
1980       }
1981       for (l=0; l<nzB; l++) {
1982         if ((ctmp = bmap[cworkB[l]]) >= cend){
1983           vals[lwrite]   = aworkB[l];
1984           cols[lwrite++] = ctmp;
1985         }
1986       }
1987       vals += ncols;
1988       cols += ncols;
1989       rptr[i+1] = rptr[i] + ncols;
1990       if (rownz_max < ncols) rownz_max = ncols;
1991     }
1992     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1993   } else { /* only copy matrix values into sbuf_a */
1994     rptr = sbuf_j;
1995     vals = sbuf_a;
1996     rptr[0] = 0;
1997     for (i=0; i<rend-rstart; i++){
1998       row = i + rstart;
1999       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2000       ncols  = nzA + nzB;
2001       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2002       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2003       lwrite = 0;
2004       for (l=0; l<nzB; l++) {
2005         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2006       }
2007       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2008       for (l=0; l<nzB; l++) {
2009         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2010       }
2011       vals += ncols;
2012       rptr[i+1] = rptr[i] + ncols;
2013     }
2014   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2015 
2016   /* send nzlocal to others, and recv other's nzlocal */
2017   /*--------------------------------------------------*/
2018   if (reuse == MAT_INITIAL_MATRIX){
2019     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2020     s_waits2 = s_waits3 + nsends;
2021     s_waits1 = s_waits2 + nsends;
2022     r_waits1 = s_waits1 + nsends;
2023     r_waits2 = r_waits1 + nrecvs;
2024     r_waits3 = r_waits2 + nrecvs;
2025   } else {
2026     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2027     r_waits3 = s_waits3 + nsends;
2028   }
2029 
2030   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2031   if (reuse == MAT_INITIAL_MATRIX){
2032     /* get new tags to keep the communication clean */
2033     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2034     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2035     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2036     rbuf_nz = sbuf_nz + nsends;
2037 
2038     /* post receives of other's nzlocal */
2039     for (i=0; i<nrecvs; i++){
2040       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2041     }
2042     /* send nzlocal to others */
2043     for (i=0; i<nsends; i++){
2044       sbuf_nz[i] = nzlocal;
2045       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2046     }
2047     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2048     count = nrecvs;
2049     while (count) {
2050       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2051       recv_rank[imdex] = recv_status.MPI_SOURCE;
2052       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2053       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2054 
2055       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2056       rbuf_nz[imdex] += i + 2;
2057       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2058       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2059       count--;
2060     }
2061     /* wait on sends of nzlocal */
2062     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2063     /* send mat->i,j to others, and recv from other's */
2064     /*------------------------------------------------*/
2065     for (i=0; i<nsends; i++){
2066       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2067       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2068     }
2069     /* wait on receives of mat->i,j */
2070     /*------------------------------*/
2071     count = nrecvs;
2072     while (count) {
2073       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2074       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2075       count--;
2076     }
2077     /* wait on sends of mat->i,j */
2078     /*---------------------------*/
2079     if (nsends) {
2080       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2081     }
2082   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2083 
2084   /* post receives, send and receive mat->a */
2085   /*----------------------------------------*/
2086   for (imdex=0; imdex<nrecvs; imdex++) {
2087     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2088   }
2089   for (i=0; i<nsends; i++){
2090     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2091   }
2092   count = nrecvs;
2093   while (count) {
2094     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2095     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2096     count--;
2097   }
2098   if (nsends) {
2099     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2100   }
2101 
2102   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2103 
2104   /* create redundant matrix */
2105   /*-------------------------*/
2106   if (reuse == MAT_INITIAL_MATRIX){
2107     /* compute rownz_max for preallocation */
2108     for (imdex=0; imdex<nrecvs; imdex++){
2109       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2110       rptr = rbuf_j[imdex];
2111       for (i=0; i<j; i++){
2112         ncols = rptr[i+1] - rptr[i];
2113         if (rownz_max < ncols) rownz_max = ncols;
2114       }
2115     }
2116 
2117     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2118     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2119     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2120     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2122   } else {
2123     C = *matredundant;
2124   }
2125 
2126   /* insert local matrix entries */
2127   rptr = sbuf_j;
2128   cols = sbuf_j + rend-rstart + 1;
2129   vals = sbuf_a;
2130   for (i=0; i<rend-rstart; i++){
2131     row   = i + rstart;
2132     ncols = rptr[i+1] - rptr[i];
2133     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2134     vals += ncols;
2135     cols += ncols;
2136   }
2137   /* insert received matrix entries */
2138   for (imdex=0; imdex<nrecvs; imdex++){
2139     rstart = rowrange[recv_rank[imdex]];
2140     rend   = rowrange[recv_rank[imdex]+1];
2141     rptr = rbuf_j[imdex];
2142     cols = rbuf_j[imdex] + rend-rstart + 1;
2143     vals = rbuf_a[imdex];
2144     for (i=0; i<rend-rstart; i++){
2145       row   = i + rstart;
2146       ncols = rptr[i+1] - rptr[i];
2147       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2148       vals += ncols;
2149       cols += ncols;
2150     }
2151   }
2152   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2155   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2156   if (reuse == MAT_INITIAL_MATRIX){
2157     PetscContainer container;
2158     *matredundant = C;
2159     /* create a supporting struct and attach it to C for reuse */
2160     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2161     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2162     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2163     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2164     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2165 
2166     redund->nzlocal = nzlocal;
2167     redund->nsends  = nsends;
2168     redund->nrecvs  = nrecvs;
2169     redund->send_rank = send_rank;
2170     redund->sbuf_nz = sbuf_nz;
2171     redund->sbuf_j  = sbuf_j;
2172     redund->sbuf_a  = sbuf_a;
2173     redund->rbuf_j  = rbuf_j;
2174     redund->rbuf_a  = rbuf_a;
2175 
2176     redund->MatDestroy = C->ops->destroy;
2177     C->ops->destroy    = MatDestroy_MatRedundant;
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /* -------------------------------------------------------------------*/
2183 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2184        MatGetRow_MPIAIJ,
2185        MatRestoreRow_MPIAIJ,
2186        MatMult_MPIAIJ,
2187 /* 4*/ MatMultAdd_MPIAIJ,
2188        MatMultTranspose_MPIAIJ,
2189        MatMultTransposeAdd_MPIAIJ,
2190 #ifdef PETSC_HAVE_PBGL
2191        MatSolve_MPIAIJ,
2192 #else
2193        0,
2194 #endif
2195        0,
2196        0,
2197 /*10*/ 0,
2198        0,
2199        0,
2200        MatRelax_MPIAIJ,
2201        MatTranspose_MPIAIJ,
2202 /*15*/ MatGetInfo_MPIAIJ,
2203        MatEqual_MPIAIJ,
2204        MatGetDiagonal_MPIAIJ,
2205        MatDiagonalScale_MPIAIJ,
2206        MatNorm_MPIAIJ,
2207 /*20*/ MatAssemblyBegin_MPIAIJ,
2208        MatAssemblyEnd_MPIAIJ,
2209        0,
2210        MatSetOption_MPIAIJ,
2211        MatZeroEntries_MPIAIJ,
2212 /*25*/ MatZeroRows_MPIAIJ,
2213        0,
2214 #ifdef PETSC_HAVE_PBGL
2215        MatLUFactorNumeric_MPIAIJ,
2216 #else
2217        0,
2218 #endif
2219        0,
2220        0,
2221 /*30*/ MatSetUpPreallocation_MPIAIJ,
2222 #ifdef PETSC_HAVE_PBGL
2223        MatILUFactorSymbolic_MPIAIJ,
2224 #else
2225        0,
2226 #endif
2227        0,
2228        0,
2229        0,
2230 /*35*/ MatDuplicate_MPIAIJ,
2231        0,
2232        0,
2233        0,
2234        0,
2235 /*40*/ MatAXPY_MPIAIJ,
2236        MatGetSubMatrices_MPIAIJ,
2237        MatIncreaseOverlap_MPIAIJ,
2238        MatGetValues_MPIAIJ,
2239        MatCopy_MPIAIJ,
2240 /*45*/ 0,
2241        MatScale_MPIAIJ,
2242        0,
2243        0,
2244        0,
2245 /*50*/ MatSetBlockSize_MPIAIJ,
2246        0,
2247        0,
2248        0,
2249        0,
2250 /*55*/ MatFDColoringCreate_MPIAIJ,
2251        0,
2252        MatSetUnfactored_MPIAIJ,
2253        MatPermute_MPIAIJ,
2254        0,
2255 /*60*/ MatGetSubMatrix_MPIAIJ,
2256        MatDestroy_MPIAIJ,
2257        MatView_MPIAIJ,
2258        0,
2259        0,
2260 /*65*/ 0,
2261        0,
2262        0,
2263        0,
2264        0,
2265 /*70*/ 0,
2266        0,
2267        MatSetColoring_MPIAIJ,
2268 #if defined(PETSC_HAVE_ADIC)
2269        MatSetValuesAdic_MPIAIJ,
2270 #else
2271        0,
2272 #endif
2273        MatSetValuesAdifor_MPIAIJ,
2274 /*75*/ 0,
2275        0,
2276        0,
2277        0,
2278        0,
2279 /*80*/ 0,
2280        0,
2281        0,
2282        0,
2283 /*84*/ MatLoad_MPIAIJ,
2284        0,
2285        0,
2286        0,
2287        0,
2288        0,
2289 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2290        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2291        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2292        MatPtAP_Basic,
2293        MatPtAPSymbolic_MPIAIJ,
2294 /*95*/ MatPtAPNumeric_MPIAIJ,
2295        0,
2296        0,
2297        0,
2298        0,
2299 /*100*/0,
2300        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2301        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2302        MatConjugate_MPIAIJ,
2303        0,
2304 /*105*/MatSetValuesRow_MPIAIJ,
2305        MatRealPart_MPIAIJ,
2306        MatImaginaryPart_MPIAIJ,
2307        0,
2308        0,
2309 /*110*/0,
2310        MatGetRedundantMatrix_MPIAIJ};
2311 
2312 /* ----------------------------------------------------------------------------------------*/
2313 
2314 EXTERN_C_BEGIN
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2317 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2318 {
2319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2320   PetscErrorCode ierr;
2321 
2322   PetscFunctionBegin;
2323   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2324   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 EXTERN_C_END
2328 
2329 EXTERN_C_BEGIN
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2332 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2333 {
2334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2335   PetscErrorCode ierr;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2339   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 EXTERN_C_END
2343 
2344 #include "petscpc.h"
2345 EXTERN_C_BEGIN
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2348 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2349 {
2350   Mat_MPIAIJ     *b;
2351   PetscErrorCode ierr;
2352   PetscInt       i;
2353 
2354   PetscFunctionBegin;
2355   B->preallocated = PETSC_TRUE;
2356   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2357   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2358   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2359   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2360 
2361   B->rmap.bs = B->cmap.bs = 1;
2362   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2363   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2364   if (d_nnz) {
2365     for (i=0; i<B->rmap.n; i++) {
2366       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2367     }
2368   }
2369   if (o_nnz) {
2370     for (i=0; i<B->rmap.n; i++) {
2371       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2372     }
2373   }
2374   b = (Mat_MPIAIJ*)B->data;
2375 
2376   /* Explicitly create 2 MATSEQAIJ matrices. */
2377   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2378   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2379   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2380   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2381   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2382   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2383   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2384   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2385 
2386   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2387   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2388 
2389   PetscFunctionReturn(0);
2390 }
2391 EXTERN_C_END
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2395 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2396 {
2397   Mat            mat;
2398   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   *newmat       = 0;
2403   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2404   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2405   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2406   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2407   a    = (Mat_MPIAIJ*)mat->data;
2408 
2409   mat->factor       = matin->factor;
2410   mat->rmap.bs      = matin->rmap.bs;
2411   mat->assembled    = PETSC_TRUE;
2412   mat->insertmode   = NOT_SET_VALUES;
2413   mat->preallocated = PETSC_TRUE;
2414 
2415   a->size           = oldmat->size;
2416   a->rank           = oldmat->rank;
2417   a->donotstash     = oldmat->donotstash;
2418   a->roworiented    = oldmat->roworiented;
2419   a->rowindices     = 0;
2420   a->rowvalues      = 0;
2421   a->getrowactive   = PETSC_FALSE;
2422 
2423   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2424   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2425 
2426   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2427   if (oldmat->colmap) {
2428 #if defined (PETSC_USE_CTABLE)
2429     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2430 #else
2431     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2432     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2433     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2434 #endif
2435   } else a->colmap = 0;
2436   if (oldmat->garray) {
2437     PetscInt len;
2438     len  = oldmat->B->cmap.n;
2439     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2441     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2442   } else a->garray = 0;
2443 
2444   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2445   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2446   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2447   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2448   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2450   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2452   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2453   *newmat = mat;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #include "petscsys.h"
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatLoad_MPIAIJ"
2461 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2462 {
2463   Mat            A;
2464   PetscScalar    *vals,*svals;
2465   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2466   MPI_Status     status;
2467   PetscErrorCode ierr;
2468   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2469   PetscInt       i,nz,j,rstart,rend,mmax;
2470   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2471   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2472   PetscInt       cend,cstart,n,*rowners;
2473   int            fd;
2474 
2475   PetscFunctionBegin;
2476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2478   if (!rank) {
2479     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2480     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2481     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2482   }
2483 
2484   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2485   M = header[1]; N = header[2];
2486   /* determine ownership of all rows */
2487   m    = M/size + ((M % size) > rank);
2488   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2489   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2490 
2491   /* First process needs enough room for process with most rows */
2492   if (!rank) {
2493     mmax       = rowners[1];
2494     for (i=2; i<size; i++) {
2495       mmax = PetscMax(mmax,rowners[i]);
2496     }
2497   } else mmax = m;
2498 
2499   rowners[0] = 0;
2500   for (i=2; i<=size; i++) {
2501     rowners[i] += rowners[i-1];
2502   }
2503   rstart = rowners[rank];
2504   rend   = rowners[rank+1];
2505 
2506   /* distribute row lengths to all processors */
2507   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2508   if (!rank) {
2509     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2510     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2511     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2512     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2513     for (j=0; j<m; j++) {
2514       procsnz[0] += ourlens[j];
2515     }
2516     for (i=1; i<size; i++) {
2517       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2518       /* calculate the number of nonzeros on each processor */
2519       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2520         procsnz[i] += rowlengths[j];
2521       }
2522       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2523     }
2524     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2525   } else {
2526     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2527   }
2528 
2529   if (!rank) {
2530     /* determine max buffer needed and allocate it */
2531     maxnz = 0;
2532     for (i=0; i<size; i++) {
2533       maxnz = PetscMax(maxnz,procsnz[i]);
2534     }
2535     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2536 
2537     /* read in my part of the matrix column indices  */
2538     nz   = procsnz[0];
2539     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2540     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2541 
2542     /* read in every one elses and ship off */
2543     for (i=1; i<size; i++) {
2544       nz   = procsnz[i];
2545       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2546       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2547     }
2548     ierr = PetscFree(cols);CHKERRQ(ierr);
2549   } else {
2550     /* determine buffer space needed for message */
2551     nz = 0;
2552     for (i=0; i<m; i++) {
2553       nz += ourlens[i];
2554     }
2555     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2556 
2557     /* receive message of column indices*/
2558     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2559     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2560     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2561   }
2562 
2563   /* determine column ownership if matrix is not square */
2564   if (N != M) {
2565     n      = N/size + ((N % size) > rank);
2566     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2567     cstart = cend - n;
2568   } else {
2569     cstart = rstart;
2570     cend   = rend;
2571     n      = cend - cstart;
2572   }
2573 
2574   /* loop over local rows, determining number of off diagonal entries */
2575   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2576   jj = 0;
2577   for (i=0; i<m; i++) {
2578     for (j=0; j<ourlens[i]; j++) {
2579       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2580       jj++;
2581     }
2582   }
2583 
2584   /* create our matrix */
2585   for (i=0; i<m; i++) {
2586     ourlens[i] -= offlens[i];
2587   }
2588   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2589   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2590   ierr = MatSetType(A,type);CHKERRQ(ierr);
2591   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2592 
2593   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2594   for (i=0; i<m; i++) {
2595     ourlens[i] += offlens[i];
2596   }
2597 
2598   if (!rank) {
2599     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2600 
2601     /* read in my part of the matrix numerical values  */
2602     nz   = procsnz[0];
2603     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2604 
2605     /* insert into matrix */
2606     jj      = rstart;
2607     smycols = mycols;
2608     svals   = vals;
2609     for (i=0; i<m; i++) {
2610       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2611       smycols += ourlens[i];
2612       svals   += ourlens[i];
2613       jj++;
2614     }
2615 
2616     /* read in other processors and ship out */
2617     for (i=1; i<size; i++) {
2618       nz   = procsnz[i];
2619       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2620       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2621     }
2622     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2623   } else {
2624     /* receive numeric values */
2625     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2626 
2627     /* receive message of values*/
2628     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2629     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2630     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2631 
2632     /* insert into matrix */
2633     jj      = rstart;
2634     smycols = mycols;
2635     svals   = vals;
2636     for (i=0; i<m; i++) {
2637       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2638       smycols += ourlens[i];
2639       svals   += ourlens[i];
2640       jj++;
2641     }
2642   }
2643   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2644   ierr = PetscFree(vals);CHKERRQ(ierr);
2645   ierr = PetscFree(mycols);CHKERRQ(ierr);
2646   ierr = PetscFree(rowners);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   *newmat = A;
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2656 /*
2657     Not great since it makes two copies of the submatrix, first an SeqAIJ
2658   in local and then by concatenating the local matrices the end result.
2659   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2660 */
2661 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2662 {
2663   PetscErrorCode ierr;
2664   PetscMPIInt    rank,size;
2665   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2666   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2667   Mat            *local,M,Mreuse;
2668   PetscScalar    *vwork,*aa;
2669   MPI_Comm       comm = mat->comm;
2670   Mat_SeqAIJ     *aij;
2671 
2672 
2673   PetscFunctionBegin;
2674   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2676 
2677   if (call ==  MAT_REUSE_MATRIX) {
2678     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2679     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2680     local = &Mreuse;
2681     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2682   } else {
2683     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2684     Mreuse = *local;
2685     ierr   = PetscFree(local);CHKERRQ(ierr);
2686   }
2687 
2688   /*
2689       m - number of local rows
2690       n - number of columns (same on all processors)
2691       rstart - first row in new global matrix generated
2692   */
2693   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2694   if (call == MAT_INITIAL_MATRIX) {
2695     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2696     ii  = aij->i;
2697     jj  = aij->j;
2698 
2699     /*
2700         Determine the number of non-zeros in the diagonal and off-diagonal
2701         portions of the matrix in order to do correct preallocation
2702     */
2703 
2704     /* first get start and end of "diagonal" columns */
2705     if (csize == PETSC_DECIDE) {
2706       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2707       if (mglobal == n) { /* square matrix */
2708 	nlocal = m;
2709       } else {
2710         nlocal = n/size + ((n % size) > rank);
2711       }
2712     } else {
2713       nlocal = csize;
2714     }
2715     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2716     rstart = rend - nlocal;
2717     if (rank == size - 1 && rend != n) {
2718       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2719     }
2720 
2721     /* next, compute all the lengths */
2722     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2723     olens = dlens + m;
2724     for (i=0; i<m; i++) {
2725       jend = ii[i+1] - ii[i];
2726       olen = 0;
2727       dlen = 0;
2728       for (j=0; j<jend; j++) {
2729         if (*jj < rstart || *jj >= rend) olen++;
2730         else dlen++;
2731         jj++;
2732       }
2733       olens[i] = olen;
2734       dlens[i] = dlen;
2735     }
2736     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2737     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2738     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2739     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2740     ierr = PetscFree(dlens);CHKERRQ(ierr);
2741   } else {
2742     PetscInt ml,nl;
2743 
2744     M = *newmat;
2745     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2746     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2747     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2748     /*
2749          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2750        rather than the slower MatSetValues().
2751     */
2752     M->was_assembled = PETSC_TRUE;
2753     M->assembled     = PETSC_FALSE;
2754   }
2755   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2756   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2757   ii  = aij->i;
2758   jj  = aij->j;
2759   aa  = aij->a;
2760   for (i=0; i<m; i++) {
2761     row   = rstart + i;
2762     nz    = ii[i+1] - ii[i];
2763     cwork = jj;     jj += nz;
2764     vwork = aa;     aa += nz;
2765     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2766   }
2767 
2768   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2770   *newmat = M;
2771 
2772   /* save submatrix used in processor for next request */
2773   if (call ==  MAT_INITIAL_MATRIX) {
2774     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2775     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2776   }
2777 
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 EXTERN_C_BEGIN
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2784 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2785 {
2786   PetscInt       m,cstart, cend,j,nnz,i,d;
2787   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2788   const PetscInt *JJ;
2789   PetscScalar    *values;
2790   PetscErrorCode ierr;
2791 
2792   PetscFunctionBegin;
2793   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2794 
2795   B->rmap.bs = B->cmap.bs = 1;
2796   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2797   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2798   m      = B->rmap.n;
2799   cstart = B->cmap.rstart;
2800   cend   = B->cmap.rend;
2801   rstart = B->rmap.rstart;
2802 
2803   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2804   o_nnz = d_nnz + m;
2805 
2806   for (i=0; i<m; i++) {
2807     nnz     = Ii[i+1]- Ii[i];
2808     JJ      = J + Ii[i];
2809     nnz_max = PetscMax(nnz_max,nnz);
2810     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2811     for (j=0; j<nnz; j++) {
2812       if (*JJ >= cstart) break;
2813       JJ++;
2814     }
2815     d = 0;
2816     for (; j<nnz; j++) {
2817       if (*JJ++ >= cend) break;
2818       d++;
2819     }
2820     d_nnz[i] = d;
2821     o_nnz[i] = nnz - d;
2822   }
2823   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2824   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2825 
2826   if (v) values = (PetscScalar*)v;
2827   else {
2828     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2829     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2830   }
2831 
2832   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2833   for (i=0; i<m; i++) {
2834     ii   = i + rstart;
2835     nnz  = Ii[i+1]- Ii[i];
2836     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2837   }
2838   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2841 
2842   if (!v) {
2843     ierr = PetscFree(values);CHKERRQ(ierr);
2844   }
2845   PetscFunctionReturn(0);
2846 }
2847 EXTERN_C_END
2848 
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2851 /*@
2852    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2853    (the default parallel PETSc format).
2854 
2855    Collective on MPI_Comm
2856 
2857    Input Parameters:
2858 +  B - the matrix
2859 .  i - the indices into j for the start of each local row (starts with zero)
2860 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2861 -  v - optional values in the matrix
2862 
2863    Level: developer
2864 
2865    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2866      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2867 
2868 .keywords: matrix, aij, compressed row, sparse, parallel
2869 
2870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2871           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2872 @*/
2873 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2874 {
2875   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2876 
2877   PetscFunctionBegin;
2878   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2879   if (f) {
2880     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2887 /*@C
2888    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2889    (the default parallel PETSc format).  For good matrix assembly performance
2890    the user should preallocate the matrix storage by setting the parameters
2891    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2892    performance can be increased by more than a factor of 50.
2893 
2894    Collective on MPI_Comm
2895 
2896    Input Parameters:
2897 +  A - the matrix
2898 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2899            (same value is used for all local rows)
2900 .  d_nnz - array containing the number of nonzeros in the various rows of the
2901            DIAGONAL portion of the local submatrix (possibly different for each row)
2902            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2903            The size of this array is equal to the number of local rows, i.e 'm'.
2904            You must leave room for the diagonal entry even if it is zero.
2905 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2906            submatrix (same value is used for all local rows).
2907 -  o_nnz - array containing the number of nonzeros in the various rows of the
2908            OFF-DIAGONAL portion of the local submatrix (possibly different for
2909            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2910            structure. The size of this array is equal to the number
2911            of local rows, i.e 'm'.
2912 
2913    If the *_nnz parameter is given then the *_nz parameter is ignored
2914 
2915    The AIJ format (also called the Yale sparse matrix format or
2916    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2917    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2918 
2919    The parallel matrix is partitioned such that the first m0 rows belong to
2920    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2921    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2922 
2923    The DIAGONAL portion of the local submatrix of a processor can be defined
2924    as the submatrix which is obtained by extraction the part corresponding
2925    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2926    first row that belongs to the processor, and r2 is the last row belonging
2927    to the this processor. This is a square mxm matrix. The remaining portion
2928    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2929 
2930    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2931 
2932    Example usage:
2933 
2934    Consider the following 8x8 matrix with 34 non-zero values, that is
2935    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2936    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2937    as follows:
2938 
2939 .vb
2940             1  2  0  |  0  3  0  |  0  4
2941     Proc0   0  5  6  |  7  0  0  |  8  0
2942             9  0 10  | 11  0  0  | 12  0
2943     -------------------------------------
2944            13  0 14  | 15 16 17  |  0  0
2945     Proc1   0 18  0  | 19 20 21  |  0  0
2946             0  0  0  | 22 23  0  | 24  0
2947     -------------------------------------
2948     Proc2  25 26 27  |  0  0 28  | 29  0
2949            30  0  0  | 31 32 33  |  0 34
2950 .ve
2951 
2952    This can be represented as a collection of submatrices as:
2953 
2954 .vb
2955       A B C
2956       D E F
2957       G H I
2958 .ve
2959 
2960    Where the submatrices A,B,C are owned by proc0, D,E,F are
2961    owned by proc1, G,H,I are owned by proc2.
2962 
2963    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2965    The 'M','N' parameters are 8,8, and have the same values on all procs.
2966 
2967    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2968    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2969    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2970    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2971    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2972    matrix, ans [DF] as another SeqAIJ matrix.
2973 
2974    When d_nz, o_nz parameters are specified, d_nz storage elements are
2975    allocated for every row of the local diagonal submatrix, and o_nz
2976    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2977    One way to choose d_nz and o_nz is to use the max nonzerors per local
2978    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2979    In this case, the values of d_nz,o_nz are:
2980 .vb
2981      proc0 : dnz = 2, o_nz = 2
2982      proc1 : dnz = 3, o_nz = 2
2983      proc2 : dnz = 1, o_nz = 4
2984 .ve
2985    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2986    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2987    for proc3. i.e we are using 12+15+10=37 storage locations to store
2988    34 values.
2989 
2990    When d_nnz, o_nnz parameters are specified, the storage is specified
2991    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2992    In the above case the values for d_nnz,o_nnz are:
2993 .vb
2994      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2995      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2996      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2997 .ve
2998    Here the space allocated is sum of all the above values i.e 34, and
2999    hence pre-allocation is perfect.
3000 
3001    Level: intermediate
3002 
3003 .keywords: matrix, aij, compressed row, sparse, parallel
3004 
3005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3006           MPIAIJ
3007 @*/
3008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3009 {
3010   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3011 
3012   PetscFunctionBegin;
3013   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3014   if (f) {
3015     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3016   }
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3022 /*@C
3023      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3024          CSR format the local rows.
3025 
3026    Collective on MPI_Comm
3027 
3028    Input Parameters:
3029 +  comm - MPI communicator
3030 .  m - number of local rows (Cannot be PETSC_DECIDE)
3031 .  n - This value should be the same as the local size used in creating the
3032        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3033        calculated if N is given) For square matrices n is almost always m.
3034 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3035 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3036 .   i - row indices
3037 .   j - column indices
3038 -   a - matrix values
3039 
3040    Output Parameter:
3041 .   mat - the matrix
3042 
3043    Level: intermediate
3044 
3045    Notes:
3046        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3047      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3048      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3049 
3050        The i and j indices are 0 based
3051 
3052 .keywords: matrix, aij, compressed row, sparse, parallel
3053 
3054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3055           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3056 @*/
3057 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3058 {
3059   PetscErrorCode ierr;
3060 
3061  PetscFunctionBegin;
3062   if (i[0]) {
3063     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3064   }
3065   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3067   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3068   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatCreateMPIAIJ"
3074 /*@C
3075    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3076    (the default parallel PETSc format).  For good matrix assembly performance
3077    the user should preallocate the matrix storage by setting the parameters
3078    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3079    performance can be increased by more than a factor of 50.
3080 
3081    Collective on MPI_Comm
3082 
3083    Input Parameters:
3084 +  comm - MPI communicator
3085 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3086            This value should be the same as the local size used in creating the
3087            y vector for the matrix-vector product y = Ax.
3088 .  n - This value should be the same as the local size used in creating the
3089        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3090        calculated if N is given) For square matrices n is almost always m.
3091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3093 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3094            (same value is used for all local rows)
3095 .  d_nnz - array containing the number of nonzeros in the various rows of the
3096            DIAGONAL portion of the local submatrix (possibly different for each row)
3097            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3098            The size of this array is equal to the number of local rows, i.e 'm'.
3099            You must leave room for the diagonal entry even if it is zero.
3100 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3101            submatrix (same value is used for all local rows).
3102 -  o_nnz - array containing the number of nonzeros in the various rows of the
3103            OFF-DIAGONAL portion of the local submatrix (possibly different for
3104            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3105            structure. The size of this array is equal to the number
3106            of local rows, i.e 'm'.
3107 
3108    Output Parameter:
3109 .  A - the matrix
3110 
3111    Notes:
3112    If the *_nnz parameter is given then the *_nz parameter is ignored
3113 
3114    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3115    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3116    storage requirements for this matrix.
3117 
3118    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3119    processor than it must be used on all processors that share the object for
3120    that argument.
3121 
3122    The user MUST specify either the local or global matrix dimensions
3123    (possibly both).
3124 
3125    The parallel matrix is partitioned across processors such that the
3126    first m0 rows belong to process 0, the next m1 rows belong to
3127    process 1, the next m2 rows belong to process 2 etc.. where
3128    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3129    values corresponding to [m x N] submatrix.
3130 
3131    The columns are logically partitioned with the n0 columns belonging
3132    to 0th partition, the next n1 columns belonging to the next
3133    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3134 
3135    The DIAGONAL portion of the local submatrix on any given processor
3136    is the submatrix corresponding to the rows and columns m,n
3137    corresponding to the given processor. i.e diagonal matrix on
3138    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3139    etc. The remaining portion of the local submatrix [m x (N-n)]
3140    constitute the OFF-DIAGONAL portion. The example below better
3141    illustrates this concept.
3142 
3143    For a square global matrix we define each processor's diagonal portion
3144    to be its local rows and the corresponding columns (a square submatrix);
3145    each processor's off-diagonal portion encompasses the remainder of the
3146    local matrix (a rectangular submatrix).
3147 
3148    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3149 
3150    When calling this routine with a single process communicator, a matrix of
3151    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3152    type of communicator, use the construction mechanism:
3153      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3154 
3155    By default, this format uses inodes (identical nodes) when possible.
3156    We search for consecutive rows with the same nonzero structure, thereby
3157    reusing matrix information to achieve increased efficiency.
3158 
3159    Options Database Keys:
3160 +  -mat_no_inode  - Do not use inodes
3161 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3162 -  -mat_aij_oneindex - Internally use indexing starting at 1
3163         rather than 0.  Note that when calling MatSetValues(),
3164         the user still MUST index entries starting at 0!
3165 
3166 
3167    Example usage:
3168 
3169    Consider the following 8x8 matrix with 34 non-zero values, that is
3170    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3171    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3172    as follows:
3173 
3174 .vb
3175             1  2  0  |  0  3  0  |  0  4
3176     Proc0   0  5  6  |  7  0  0  |  8  0
3177             9  0 10  | 11  0  0  | 12  0
3178     -------------------------------------
3179            13  0 14  | 15 16 17  |  0  0
3180     Proc1   0 18  0  | 19 20 21  |  0  0
3181             0  0  0  | 22 23  0  | 24  0
3182     -------------------------------------
3183     Proc2  25 26 27  |  0  0 28  | 29  0
3184            30  0  0  | 31 32 33  |  0 34
3185 .ve
3186 
3187    This can be represented as a collection of submatrices as:
3188 
3189 .vb
3190       A B C
3191       D E F
3192       G H I
3193 .ve
3194 
3195    Where the submatrices A,B,C are owned by proc0, D,E,F are
3196    owned by proc1, G,H,I are owned by proc2.
3197 
3198    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3199    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3200    The 'M','N' parameters are 8,8, and have the same values on all procs.
3201 
3202    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3203    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3204    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3205    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3206    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3207    matrix, ans [DF] as another SeqAIJ matrix.
3208 
3209    When d_nz, o_nz parameters are specified, d_nz storage elements are
3210    allocated for every row of the local diagonal submatrix, and o_nz
3211    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3212    One way to choose d_nz and o_nz is to use the max nonzerors per local
3213    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3214    In this case, the values of d_nz,o_nz are:
3215 .vb
3216      proc0 : dnz = 2, o_nz = 2
3217      proc1 : dnz = 3, o_nz = 2
3218      proc2 : dnz = 1, o_nz = 4
3219 .ve
3220    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3221    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3222    for proc3. i.e we are using 12+15+10=37 storage locations to store
3223    34 values.
3224 
3225    When d_nnz, o_nnz parameters are specified, the storage is specified
3226    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3227    In the above case the values for d_nnz,o_nnz are:
3228 .vb
3229      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3230      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3231      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3232 .ve
3233    Here the space allocated is sum of all the above values i.e 34, and
3234    hence pre-allocation is perfect.
3235 
3236    Level: intermediate
3237 
3238 .keywords: matrix, aij, compressed row, sparse, parallel
3239 
3240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3241           MPIAIJ, MatCreateMPIAIJWithArrays()
3242 @*/
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3244 {
3245   PetscErrorCode ierr;
3246   PetscMPIInt    size;
3247 
3248   PetscFunctionBegin;
3249   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3250   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3252   if (size > 1) {
3253     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3254     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3255   } else {
3256     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3257     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3264 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3265 {
3266   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3267 
3268   PetscFunctionBegin;
3269   *Ad     = a->A;
3270   *Ao     = a->B;
3271   *colmap = a->garray;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3277 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3278 {
3279   PetscErrorCode ierr;
3280   PetscInt       i;
3281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3282 
3283   PetscFunctionBegin;
3284   if (coloring->ctype == IS_COLORING_GLOBAL) {
3285     ISColoringValue *allcolors,*colors;
3286     ISColoring      ocoloring;
3287 
3288     /* set coloring for diagonal portion */
3289     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3290 
3291     /* set coloring for off-diagonal portion */
3292     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3293     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3294     for (i=0; i<a->B->cmap.n; i++) {
3295       colors[i] = allcolors[a->garray[i]];
3296     }
3297     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3298     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3299     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3300     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3301   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3302     ISColoringValue *colors;
3303     PetscInt        *larray;
3304     ISColoring      ocoloring;
3305 
3306     /* set coloring for diagonal portion */
3307     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3308     for (i=0; i<a->A->cmap.n; i++) {
3309       larray[i] = i + A->cmap.rstart;
3310     }
3311     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3312     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3313     for (i=0; i<a->A->cmap.n; i++) {
3314       colors[i] = coloring->colors[larray[i]];
3315     }
3316     ierr = PetscFree(larray);CHKERRQ(ierr);
3317     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3318     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3319     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3320 
3321     /* set coloring for off-diagonal portion */
3322     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3323     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3324     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3325     for (i=0; i<a->B->cmap.n; i++) {
3326       colors[i] = coloring->colors[larray[i]];
3327     }
3328     ierr = PetscFree(larray);CHKERRQ(ierr);
3329     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3330     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3331     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3332   } else {
3333     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3334   }
3335 
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 #if defined(PETSC_HAVE_ADIC)
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3342 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3343 {
3344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3345   PetscErrorCode ierr;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3349   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 #endif
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3356 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3357 {
3358   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3359   PetscErrorCode ierr;
3360 
3361   PetscFunctionBegin;
3362   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3363   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 #undef __FUNCT__
3368 #define __FUNCT__ "MatMerge"
3369 /*@C
3370       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3371                  matrices from each processor
3372 
3373     Collective on MPI_Comm
3374 
3375    Input Parameters:
3376 +    comm - the communicators the parallel matrix will live on
3377 .    inmat - the input sequential matrices
3378 .    n - number of local columns (or PETSC_DECIDE)
3379 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3380 
3381    Output Parameter:
3382 .    outmat - the parallel matrix generated
3383 
3384     Level: advanced
3385 
3386    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3387 
3388 @*/
3389 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3390 {
3391   PetscErrorCode ierr;
3392   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3393   PetscInt       *indx;
3394   PetscScalar    *values;
3395 
3396   PetscFunctionBegin;
3397   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3398   if (scall == MAT_INITIAL_MATRIX){
3399     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3400     if (n == PETSC_DECIDE){
3401       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3402     }
3403     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3404     rstart -= m;
3405 
3406     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3407     for (i=0;i<m;i++) {
3408       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3409       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3410       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3411     }
3412     /* This routine will ONLY return MPIAIJ type matrix */
3413     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3414     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3415     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3416     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3417     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3418 
3419   } else if (scall == MAT_REUSE_MATRIX){
3420     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3421   } else {
3422     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3423   }
3424 
3425   for (i=0;i<m;i++) {
3426     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3427     Ii    = i + rstart;
3428     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3429     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3430   }
3431   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3432   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3434 
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatFileSplit"
3440 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3441 {
3442   PetscErrorCode    ierr;
3443   PetscMPIInt       rank;
3444   PetscInt          m,N,i,rstart,nnz;
3445   size_t            len;
3446   const PetscInt    *indx;
3447   PetscViewer       out;
3448   char              *name;
3449   Mat               B;
3450   const PetscScalar *values;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3454   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3455   /* Should this be the type of the diagonal block of A? */
3456   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3457   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3458   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3459   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3460   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3461   for (i=0;i<m;i++) {
3462     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3463     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3464     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3465   }
3466   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468 
3469   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3470   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3471   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3472   sprintf(name,"%s.%d",outfile,rank);
3473   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3474   ierr = PetscFree(name);
3475   ierr = MatView(B,out);CHKERRQ(ierr);
3476   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3477   ierr = MatDestroy(B);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3484 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3485 {
3486   PetscErrorCode       ierr;
3487   Mat_Merge_SeqsToMPI  *merge;
3488   PetscContainer       container;
3489 
3490   PetscFunctionBegin;
3491   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3492   if (container) {
3493     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3494     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3495     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3496     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3497     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3498     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3499     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3500     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3501     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3502     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3503     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3504     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3505 
3506     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3507     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3508   }
3509   ierr = PetscFree(merge);CHKERRQ(ierr);
3510 
3511   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 #include "src/mat/utils/freespace.h"
3516 #include "petscbt.h"
3517 static PetscEvent logkey_seqstompinum = 0;
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3520 /*@C
3521       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3522                  matrices from each processor
3523 
3524     Collective on MPI_Comm
3525 
3526    Input Parameters:
3527 +    comm - the communicators the parallel matrix will live on
3528 .    seqmat - the input sequential matrices
3529 .    m - number of local rows (or PETSC_DECIDE)
3530 .    n - number of local columns (or PETSC_DECIDE)
3531 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3532 
3533    Output Parameter:
3534 .    mpimat - the parallel matrix generated
3535 
3536     Level: advanced
3537 
3538    Notes:
3539      The dimensions of the sequential matrix in each processor MUST be the same.
3540      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3541      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3542 @*/
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3544 {
3545   PetscErrorCode       ierr;
3546   MPI_Comm             comm=mpimat->comm;
3547   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3548   PetscMPIInt          size,rank,taga,*len_s;
3549   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3550   PetscInt             proc,m;
3551   PetscInt             **buf_ri,**buf_rj;
3552   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3553   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3554   MPI_Request          *s_waits,*r_waits;
3555   MPI_Status           *status;
3556   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3557   Mat_Merge_SeqsToMPI  *merge;
3558   PetscContainer       container;
3559 
3560   PetscFunctionBegin;
3561   if (!logkey_seqstompinum) {
3562     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3563   }
3564   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3565 
3566   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3567   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3568 
3569   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3570   if (container) {
3571     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3572   }
3573   bi     = merge->bi;
3574   bj     = merge->bj;
3575   buf_ri = merge->buf_ri;
3576   buf_rj = merge->buf_rj;
3577 
3578   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3579   owners = merge->rowmap.range;
3580   len_s  = merge->len_s;
3581 
3582   /* send and recv matrix values */
3583   /*-----------------------------*/
3584   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3585   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3586 
3587   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3588   for (proc=0,k=0; proc<size; proc++){
3589     if (!len_s[proc]) continue;
3590     i = owners[proc];
3591     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3592     k++;
3593   }
3594 
3595   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3596   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3597   ierr = PetscFree(status);CHKERRQ(ierr);
3598 
3599   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3600   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3601 
3602   /* insert mat values of mpimat */
3603   /*----------------------------*/
3604   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3605   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3606   nextrow = buf_ri_k + merge->nrecv;
3607   nextai  = nextrow + merge->nrecv;
3608 
3609   for (k=0; k<merge->nrecv; k++){
3610     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3611     nrows = *(buf_ri_k[k]);
3612     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3613     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3614   }
3615 
3616   /* set values of ba */
3617   m = merge->rowmap.n;
3618   for (i=0; i<m; i++) {
3619     arow = owners[rank] + i;
3620     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3621     bnzi = bi[i+1] - bi[i];
3622     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3623 
3624     /* add local non-zero vals of this proc's seqmat into ba */
3625     anzi = ai[arow+1] - ai[arow];
3626     aj   = a->j + ai[arow];
3627     aa   = a->a + ai[arow];
3628     nextaj = 0;
3629     for (j=0; nextaj<anzi; j++){
3630       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3631         ba_i[j] += aa[nextaj++];
3632       }
3633     }
3634 
3635     /* add received vals into ba */
3636     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3637       /* i-th row */
3638       if (i == *nextrow[k]) {
3639         anzi = *(nextai[k]+1) - *nextai[k];
3640         aj   = buf_rj[k] + *(nextai[k]);
3641         aa   = abuf_r[k] + *(nextai[k]);
3642         nextaj = 0;
3643         for (j=0; nextaj<anzi; j++){
3644           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3645             ba_i[j] += aa[nextaj++];
3646           }
3647         }
3648         nextrow[k]++; nextai[k]++;
3649       }
3650     }
3651     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3652   }
3653   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655 
3656   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3657   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3658   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3659   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 static PetscEvent logkey_seqstompisym = 0;
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3666 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3667 {
3668   PetscErrorCode       ierr;
3669   Mat                  B_mpi;
3670   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3671   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3672   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3673   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3674   PetscInt             len,proc,*dnz,*onz;
3675   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3676   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3677   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3678   MPI_Status           *status;
3679   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3680   PetscBT              lnkbt;
3681   Mat_Merge_SeqsToMPI  *merge;
3682   PetscContainer       container;
3683 
3684   PetscFunctionBegin;
3685   if (!logkey_seqstompisym) {
3686     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3687   }
3688   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3689 
3690   /* make sure it is a PETSc comm */
3691   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3693   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3694 
3695   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3696   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3697 
3698   /* determine row ownership */
3699   /*---------------------------------------------------------*/
3700   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3701   merge->rowmap.n = m;
3702   merge->rowmap.N = M;
3703   merge->rowmap.bs = 1;
3704   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3705   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3706   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3707 
3708   m      = merge->rowmap.n;
3709   M      = merge->rowmap.N;
3710   owners = merge->rowmap.range;
3711 
3712   /* determine the number of messages to send, their lengths */
3713   /*---------------------------------------------------------*/
3714   len_s  = merge->len_s;
3715 
3716   len = 0;  /* length of buf_si[] */
3717   merge->nsend = 0;
3718   for (proc=0; proc<size; proc++){
3719     len_si[proc] = 0;
3720     if (proc == rank){
3721       len_s[proc] = 0;
3722     } else {
3723       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3724       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3725     }
3726     if (len_s[proc]) {
3727       merge->nsend++;
3728       nrows = 0;
3729       for (i=owners[proc]; i<owners[proc+1]; i++){
3730         if (ai[i+1] > ai[i]) nrows++;
3731       }
3732       len_si[proc] = 2*(nrows+1);
3733       len += len_si[proc];
3734     }
3735   }
3736 
3737   /* determine the number and length of messages to receive for ij-structure */
3738   /*-------------------------------------------------------------------------*/
3739   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3740   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3741 
3742   /* post the Irecv of j-structure */
3743   /*-------------------------------*/
3744   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3745   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3746 
3747   /* post the Isend of j-structure */
3748   /*--------------------------------*/
3749   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3750   sj_waits = si_waits + merge->nsend;
3751 
3752   for (proc=0, k=0; proc<size; proc++){
3753     if (!len_s[proc]) continue;
3754     i = owners[proc];
3755     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3756     k++;
3757   }
3758 
3759   /* receives and sends of j-structure are complete */
3760   /*------------------------------------------------*/
3761   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3762   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3763 
3764   /* send and recv i-structure */
3765   /*---------------------------*/
3766   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3767   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3768 
3769   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3770   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3771   for (proc=0,k=0; proc<size; proc++){
3772     if (!len_s[proc]) continue;
3773     /* form outgoing message for i-structure:
3774          buf_si[0]:                 nrows to be sent
3775                [1:nrows]:           row index (global)
3776                [nrows+1:2*nrows+1]: i-structure index
3777     */
3778     /*-------------------------------------------*/
3779     nrows = len_si[proc]/2 - 1;
3780     buf_si_i    = buf_si + nrows+1;
3781     buf_si[0]   = nrows;
3782     buf_si_i[0] = 0;
3783     nrows = 0;
3784     for (i=owners[proc]; i<owners[proc+1]; i++){
3785       anzi = ai[i+1] - ai[i];
3786       if (anzi) {
3787         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3788         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3789         nrows++;
3790       }
3791     }
3792     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3793     k++;
3794     buf_si += len_si[proc];
3795   }
3796 
3797   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3798   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3799 
3800   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3801   for (i=0; i<merge->nrecv; i++){
3802     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3803   }
3804 
3805   ierr = PetscFree(len_si);CHKERRQ(ierr);
3806   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3807   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3808   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3809   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3810   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3811   ierr = PetscFree(status);CHKERRQ(ierr);
3812 
3813   /* compute a local seq matrix in each processor */
3814   /*----------------------------------------------*/
3815   /* allocate bi array and free space for accumulating nonzero column info */
3816   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3817   bi[0] = 0;
3818 
3819   /* create and initialize a linked list */
3820   nlnk = N+1;
3821   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3822 
3823   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3824   len = 0;
3825   len  = ai[owners[rank+1]] - ai[owners[rank]];
3826   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3827   current_space = free_space;
3828 
3829   /* determine symbolic info for each local row */
3830   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3831   nextrow = buf_ri_k + merge->nrecv;
3832   nextai  = nextrow + merge->nrecv;
3833   for (k=0; k<merge->nrecv; k++){
3834     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3835     nrows = *buf_ri_k[k];
3836     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3837     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3838   }
3839 
3840   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3841   len = 0;
3842   for (i=0;i<m;i++) {
3843     bnzi   = 0;
3844     /* add local non-zero cols of this proc's seqmat into lnk */
3845     arow   = owners[rank] + i;
3846     anzi   = ai[arow+1] - ai[arow];
3847     aj     = a->j + ai[arow];
3848     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3849     bnzi += nlnk;
3850     /* add received col data into lnk */
3851     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3852       if (i == *nextrow[k]) { /* i-th row */
3853         anzi = *(nextai[k]+1) - *nextai[k];
3854         aj   = buf_rj[k] + *nextai[k];
3855         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3856         bnzi += nlnk;
3857         nextrow[k]++; nextai[k]++;
3858       }
3859     }
3860     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3861 
3862     /* if free space is not available, make more free space */
3863     if (current_space->local_remaining<bnzi) {
3864       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3865       nspacedouble++;
3866     }
3867     /* copy data into free space, then initialize lnk */
3868     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3869     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3870 
3871     current_space->array           += bnzi;
3872     current_space->local_used      += bnzi;
3873     current_space->local_remaining -= bnzi;
3874 
3875     bi[i+1] = bi[i] + bnzi;
3876   }
3877 
3878   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3879 
3880   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3881   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3882   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3883 
3884   /* create symbolic parallel matrix B_mpi */
3885   /*---------------------------------------*/
3886   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3887   if (n==PETSC_DECIDE) {
3888     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3889   } else {
3890     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3891   }
3892   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3893   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3894   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3895 
3896   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3897   B_mpi->assembled     = PETSC_FALSE;
3898   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3899   merge->bi            = bi;
3900   merge->bj            = bj;
3901   merge->buf_ri        = buf_ri;
3902   merge->buf_rj        = buf_rj;
3903   merge->coi           = PETSC_NULL;
3904   merge->coj           = PETSC_NULL;
3905   merge->owners_co     = PETSC_NULL;
3906 
3907   /* attach the supporting struct to B_mpi for reuse */
3908   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3909   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3910   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3911   *mpimat = B_mpi;
3912 
3913   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3914   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 static PetscEvent logkey_seqstompi = 0;
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatMerge_SeqsToMPI"
3921 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3922 {
3923   PetscErrorCode   ierr;
3924 
3925   PetscFunctionBegin;
3926   if (!logkey_seqstompi) {
3927     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3928   }
3929   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3930   if (scall == MAT_INITIAL_MATRIX){
3931     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3932   }
3933   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3934   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3935   PetscFunctionReturn(0);
3936 }
3937 static PetscEvent logkey_getlocalmat = 0;
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatGetLocalMat"
3940 /*@C
3941      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3942 
3943     Not Collective
3944 
3945    Input Parameters:
3946 +    A - the matrix
3947 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3948 
3949    Output Parameter:
3950 .    A_loc - the local sequential matrix generated
3951 
3952     Level: developer
3953 
3954 @*/
3955 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3956 {
3957   PetscErrorCode  ierr;
3958   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3959   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3960   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3961   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3962   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3963   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3964 
3965   PetscFunctionBegin;
3966   if (!logkey_getlocalmat) {
3967     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3968   }
3969   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3970   if (scall == MAT_INITIAL_MATRIX){
3971     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3972     ci[0] = 0;
3973     for (i=0; i<am; i++){
3974       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3975     }
3976     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3977     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3978     k = 0;
3979     for (i=0; i<am; i++) {
3980       ncols_o = bi[i+1] - bi[i];
3981       ncols_d = ai[i+1] - ai[i];
3982       /* off-diagonal portion of A */
3983       for (jo=0; jo<ncols_o; jo++) {
3984         col = cmap[*bj];
3985         if (col >= cstart) break;
3986         cj[k]   = col; bj++;
3987         ca[k++] = *ba++;
3988       }
3989       /* diagonal portion of A */
3990       for (j=0; j<ncols_d; j++) {
3991         cj[k]   = cstart + *aj++;
3992         ca[k++] = *aa++;
3993       }
3994       /* off-diagonal portion of A */
3995       for (j=jo; j<ncols_o; j++) {
3996         cj[k]   = cmap[*bj++];
3997         ca[k++] = *ba++;
3998       }
3999     }
4000     /* put together the new matrix */
4001     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4002     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4003     /* Since these are PETSc arrays, change flags to free them as necessary. */
4004     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4005     mat->free_a  = PETSC_TRUE;
4006     mat->free_ij = PETSC_TRUE;
4007     mat->nonew   = 0;
4008   } else if (scall == MAT_REUSE_MATRIX){
4009     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4010     ci = mat->i; cj = mat->j; ca = mat->a;
4011     for (i=0; i<am; i++) {
4012       /* off-diagonal portion of A */
4013       ncols_o = bi[i+1] - bi[i];
4014       for (jo=0; jo<ncols_o; jo++) {
4015         col = cmap[*bj];
4016         if (col >= cstart) break;
4017         *ca++ = *ba++; bj++;
4018       }
4019       /* diagonal portion of A */
4020       ncols_d = ai[i+1] - ai[i];
4021       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4022       /* off-diagonal portion of A */
4023       for (j=jo; j<ncols_o; j++) {
4024         *ca++ = *ba++; bj++;
4025       }
4026     }
4027   } else {
4028     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4029   }
4030 
4031   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 static PetscEvent logkey_getlocalmatcondensed = 0;
4036 #undef __FUNCT__
4037 #define __FUNCT__ "MatGetLocalMatCondensed"
4038 /*@C
4039      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4040 
4041     Not Collective
4042 
4043    Input Parameters:
4044 +    A - the matrix
4045 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4046 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4047 
4048    Output Parameter:
4049 .    A_loc - the local sequential matrix generated
4050 
4051     Level: developer
4052 
4053 @*/
4054 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4055 {
4056   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4057   PetscErrorCode    ierr;
4058   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4059   IS                isrowa,iscola;
4060   Mat               *aloc;
4061 
4062   PetscFunctionBegin;
4063   if (!logkey_getlocalmatcondensed) {
4064     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4065   }
4066   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4067   if (!row){
4068     start = A->rmap.rstart; end = A->rmap.rend;
4069     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4070   } else {
4071     isrowa = *row;
4072   }
4073   if (!col){
4074     start = A->cmap.rstart;
4075     cmap  = a->garray;
4076     nzA   = a->A->cmap.n;
4077     nzB   = a->B->cmap.n;
4078     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4079     ncols = 0;
4080     for (i=0; i<nzB; i++) {
4081       if (cmap[i] < start) idx[ncols++] = cmap[i];
4082       else break;
4083     }
4084     imark = i;
4085     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4086     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4087     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4088     ierr = PetscFree(idx);CHKERRQ(ierr);
4089   } else {
4090     iscola = *col;
4091   }
4092   if (scall != MAT_INITIAL_MATRIX){
4093     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4094     aloc[0] = *A_loc;
4095   }
4096   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4097   *A_loc = aloc[0];
4098   ierr = PetscFree(aloc);CHKERRQ(ierr);
4099   if (!row){
4100     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4101   }
4102   if (!col){
4103     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4104   }
4105   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4106   PetscFunctionReturn(0);
4107 }
4108 
4109 static PetscEvent logkey_GetBrowsOfAcols = 0;
4110 #undef __FUNCT__
4111 #define __FUNCT__ "MatGetBrowsOfAcols"
4112 /*@C
4113     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4114 
4115     Collective on Mat
4116 
4117    Input Parameters:
4118 +    A,B - the matrices in mpiaij format
4119 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4120 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4121 
4122    Output Parameter:
4123 +    rowb, colb - index sets of rows and columns of B to extract
4124 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4125 -    B_seq - the sequential matrix generated
4126 
4127     Level: developer
4128 
4129 @*/
4130 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4131 {
4132   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4133   PetscErrorCode    ierr;
4134   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4135   IS                isrowb,iscolb;
4136   Mat               *bseq;
4137 
4138   PetscFunctionBegin;
4139   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4140     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4141   }
4142   if (!logkey_GetBrowsOfAcols) {
4143     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4144   }
4145   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4146 
4147   if (scall == MAT_INITIAL_MATRIX){
4148     start = A->cmap.rstart;
4149     cmap  = a->garray;
4150     nzA   = a->A->cmap.n;
4151     nzB   = a->B->cmap.n;
4152     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4153     ncols = 0;
4154     for (i=0; i<nzB; i++) {  /* row < local row index */
4155       if (cmap[i] < start) idx[ncols++] = cmap[i];
4156       else break;
4157     }
4158     imark = i;
4159     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4160     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4161     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4162     ierr = PetscFree(idx);CHKERRQ(ierr);
4163     *brstart = imark;
4164     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4165   } else {
4166     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4167     isrowb = *rowb; iscolb = *colb;
4168     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4169     bseq[0] = *B_seq;
4170   }
4171   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4172   *B_seq = bseq[0];
4173   ierr = PetscFree(bseq);CHKERRQ(ierr);
4174   if (!rowb){
4175     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4176   } else {
4177     *rowb = isrowb;
4178   }
4179   if (!colb){
4180     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4181   } else {
4182     *colb = iscolb;
4183   }
4184   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 static PetscEvent logkey_GetBrowsOfAocols = 0;
4189 #undef __FUNCT__
4190 #define __FUNCT__ "MatGetBrowsOfAoCols"
4191 /*@C
4192     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4193     of the OFF-DIAGONAL portion of local A
4194 
4195     Collective on Mat
4196 
4197    Input Parameters:
4198 +    A,B - the matrices in mpiaij format
4199 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4200 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4201 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4202 
4203    Output Parameter:
4204 +    B_oth - the sequential matrix generated
4205 
4206     Level: developer
4207 
4208 @*/
4209 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4210 {
4211   VecScatter_MPI_General *gen_to,*gen_from;
4212   PetscErrorCode         ierr;
4213   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4214   Mat_SeqAIJ             *b_oth;
4215   VecScatter             ctx=a->Mvctx;
4216   MPI_Comm               comm=ctx->comm;
4217   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4218   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4219   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4220   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4221   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4222   MPI_Status             *sstatus,rstatus;
4223   PetscInt               *cols,sbs,rbs;
4224   PetscScalar            *vals;
4225 
4226   PetscFunctionBegin;
4227   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4228     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4229   }
4230   if (!logkey_GetBrowsOfAocols) {
4231     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4232   }
4233   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4234   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4235 
4236   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4237   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4238   rvalues  = gen_from->values; /* holds the length of receiving row */
4239   svalues  = gen_to->values;   /* holds the length of sending row */
4240   nrecvs   = gen_from->n;
4241   nsends   = gen_to->n;
4242 
4243   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4244   srow     = gen_to->indices;   /* local row index to be sent */
4245   sstarts  = gen_to->starts;
4246   sprocs   = gen_to->procs;
4247   sstatus  = gen_to->sstatus;
4248   sbs      = gen_to->bs;
4249   rstarts  = gen_from->starts;
4250   rprocs   = gen_from->procs;
4251   rbs      = gen_from->bs;
4252 
4253   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4254   if (scall == MAT_INITIAL_MATRIX){
4255     /* i-array */
4256     /*---------*/
4257     /*  post receives */
4258     for (i=0; i<nrecvs; i++){
4259       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4260       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4261       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4262     }
4263 
4264     /* pack the outgoing message */
4265     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4266     rstartsj = sstartsj + nsends +1;
4267     sstartsj[0] = 0;  rstartsj[0] = 0;
4268     len = 0; /* total length of j or a array to be sent */
4269     k = 0;
4270     for (i=0; i<nsends; i++){
4271       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4272       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4273       for (j=0; j<nrows; j++) {
4274         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4275         for (l=0; l<sbs; l++){
4276           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4277           rowlen[j*sbs+l] = ncols;
4278           len += ncols;
4279           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4280         }
4281         k++;
4282       }
4283       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4284       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4285     }
4286     /* recvs and sends of i-array are completed */
4287     i = nrecvs;
4288     while (i--) {
4289       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4290     }
4291     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4292 
4293     /* allocate buffers for sending j and a arrays */
4294     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4295     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4296 
4297     /* create i-array of B_oth */
4298     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4299     b_othi[0] = 0;
4300     len = 0; /* total length of j or a array to be received */
4301     k = 0;
4302     for (i=0; i<nrecvs; i++){
4303       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4304       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4305       for (j=0; j<nrows; j++) {
4306         b_othi[k+1] = b_othi[k] + rowlen[j];
4307         len += rowlen[j]; k++;
4308       }
4309       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4310     }
4311 
4312     /* allocate space for j and a arrrays of B_oth */
4313     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4314     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4315 
4316     /* j-array */
4317     /*---------*/
4318     /*  post receives of j-array */
4319     for (i=0; i<nrecvs; i++){
4320       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4321       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4322     }
4323 
4324     /* pack the outgoing message j-array */
4325     k = 0;
4326     for (i=0; i<nsends; i++){
4327       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4328       bufJ = bufj+sstartsj[i];
4329       for (j=0; j<nrows; j++) {
4330         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4331         for (ll=0; ll<sbs; ll++){
4332           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4333           for (l=0; l<ncols; l++){
4334             *bufJ++ = cols[l];
4335           }
4336           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4337         }
4338       }
4339       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4340     }
4341 
4342     /* recvs and sends of j-array are completed */
4343     i = nrecvs;
4344     while (i--) {
4345       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4346     }
4347     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4348   } else if (scall == MAT_REUSE_MATRIX){
4349     sstartsj = *startsj;
4350     rstartsj = sstartsj + nsends +1;
4351     bufa     = *bufa_ptr;
4352     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4353     b_otha   = b_oth->a;
4354   } else {
4355     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4356   }
4357 
4358   /* a-array */
4359   /*---------*/
4360   /*  post receives of a-array */
4361   for (i=0; i<nrecvs; i++){
4362     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4363     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4364   }
4365 
4366   /* pack the outgoing message a-array */
4367   k = 0;
4368   for (i=0; i<nsends; i++){
4369     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4370     bufA = bufa+sstartsj[i];
4371     for (j=0; j<nrows; j++) {
4372       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4373       for (ll=0; ll<sbs; ll++){
4374         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4375         for (l=0; l<ncols; l++){
4376           *bufA++ = vals[l];
4377         }
4378         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4379       }
4380     }
4381     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4382   }
4383   /* recvs and sends of a-array are completed */
4384   i = nrecvs;
4385   while (i--) {
4386     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4387   }
4388   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4389   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4390 
4391   if (scall == MAT_INITIAL_MATRIX){
4392     /* put together the new matrix */
4393     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4394 
4395     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4396     /* Since these are PETSc arrays, change flags to free them as necessary. */
4397     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4398     b_oth->free_a  = PETSC_TRUE;
4399     b_oth->free_ij = PETSC_TRUE;
4400     b_oth->nonew   = 0;
4401 
4402     ierr = PetscFree(bufj);CHKERRQ(ierr);
4403     if (!startsj || !bufa_ptr){
4404       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4405       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4406     } else {
4407       *startsj  = sstartsj;
4408       *bufa_ptr = bufa;
4409     }
4410   }
4411   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4412   PetscFunctionReturn(0);
4413 }
4414 
4415 #undef __FUNCT__
4416 #define __FUNCT__ "MatGetCommunicationStructs"
4417 /*@C
4418   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4419 
4420   Not Collective
4421 
4422   Input Parameters:
4423 . A - The matrix in mpiaij format
4424 
4425   Output Parameter:
4426 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4427 . colmap - A map from global column index to local index into lvec
4428 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4429 
4430   Level: developer
4431 
4432 @*/
4433 #if defined (PETSC_USE_CTABLE)
4434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4435 #else
4436 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4437 #endif
4438 {
4439   Mat_MPIAIJ *a;
4440 
4441   PetscFunctionBegin;
4442   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4443   PetscValidPointer(lvec, 2)
4444   PetscValidPointer(colmap, 3)
4445   PetscValidPointer(multScatter, 4)
4446   a = (Mat_MPIAIJ *) A->data;
4447   if (lvec) *lvec = a->lvec;
4448   if (colmap) *colmap = a->colmap;
4449   if (multScatter) *multScatter = a->Mvctx;
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 EXTERN_C_BEGIN
4454 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4455 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4456 EXTERN_C_END
4457 
4458 /*MC
4459    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4460 
4461    Options Database Keys:
4462 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4463 
4464   Level: beginner
4465 
4466 .seealso: MatCreateMPIAIJ
4467 M*/
4468 
4469 EXTERN_C_BEGIN
4470 #undef __FUNCT__
4471 #define __FUNCT__ "MatCreate_MPIAIJ"
4472 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4473 {
4474   Mat_MPIAIJ     *b;
4475   PetscErrorCode ierr;
4476   PetscMPIInt    size;
4477 
4478   PetscFunctionBegin;
4479   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4480 
4481   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4482   B->data         = (void*)b;
4483   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4484   B->factor       = 0;
4485   B->rmap.bs      = 1;
4486   B->assembled    = PETSC_FALSE;
4487   B->mapping      = 0;
4488 
4489   B->insertmode      = NOT_SET_VALUES;
4490   b->size            = size;
4491   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4492 
4493   /* build cache for off array entries formed */
4494   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4495   b->donotstash  = PETSC_FALSE;
4496   b->colmap      = 0;
4497   b->garray      = 0;
4498   b->roworiented = PETSC_TRUE;
4499 
4500   /* stuff used for matrix vector multiply */
4501   b->lvec      = PETSC_NULL;
4502   b->Mvctx     = PETSC_NULL;
4503 
4504   /* stuff for MatGetRow() */
4505   b->rowindices   = 0;
4506   b->rowvalues    = 0;
4507   b->getrowactive = PETSC_FALSE;
4508 
4509 
4510   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4511                                      "MatStoreValues_MPIAIJ",
4512                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4513   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4514                                      "MatRetrieveValues_MPIAIJ",
4515                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4516   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4517 				     "MatGetDiagonalBlock_MPIAIJ",
4518                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4519   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4520 				     "MatIsTranspose_MPIAIJ",
4521 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4522   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4523 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4524 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4525   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4526 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4527 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4528   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4529 				     "MatDiagonalScaleLocal_MPIAIJ",
4530 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4531   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4532                                      "MatConvert_MPIAIJ_MPICSRPERM",
4533                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4534   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4535                                      "MatConvert_MPIAIJ_MPICRL",
4536                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4537   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4538   PetscFunctionReturn(0);
4539 }
4540 EXTERN_C_END
4541 
4542 #undef __FUNCT__
4543 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4544 /*@C
4545      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4546          and "off-diagonal" part of the matrix in CSR format.
4547 
4548    Collective on MPI_Comm
4549 
4550    Input Parameters:
4551 +  comm - MPI communicator
4552 .  m - number of local rows (Cannot be PETSC_DECIDE)
4553 .  n - This value should be the same as the local size used in creating the
4554        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4555        calculated if N is given) For square matrices n is almost always m.
4556 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4557 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4558 .   i - row indices for "diagonal" portion of matrix
4559 .   j - column indices
4560 .   a - matrix values
4561 .   oi - row indices for "off-diagonal" portion of matrix
4562 .   oj - column indices
4563 -   oa - matrix values
4564 
4565    Output Parameter:
4566 .   mat - the matrix
4567 
4568    Level: advanced
4569 
4570    Notes:
4571        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4572 
4573        The i and j indices are 0 based
4574 
4575        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4576 
4577 
4578 .keywords: matrix, aij, compressed row, sparse, parallel
4579 
4580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4581           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4582 @*/
4583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4584 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4585 {
4586   PetscErrorCode ierr;
4587   Mat_MPIAIJ     *maij;
4588 
4589  PetscFunctionBegin;
4590   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4591   if (i[0]) {
4592     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4593   }
4594   if (oi[0]) {
4595     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4596   }
4597   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4598   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4599   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4600   maij = (Mat_MPIAIJ*) (*mat)->data;
4601   maij->donotstash     = PETSC_TRUE;
4602   (*mat)->preallocated = PETSC_TRUE;
4603 
4604   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4605   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4606   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4607 
4608   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4609   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4610 
4611   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4612   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4614   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4615 
4616   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4617   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618   PetscFunctionReturn(0);
4619 }
4620 
4621 /*
4622     Special version for direct calls from Fortran
4623 */
4624 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4625 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4626 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4627 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4628 #endif
4629 
4630 /* Change these macros so can be used in void function */
4631 #undef CHKERRQ
4632 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4633 #undef SETERRQ2
4634 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4635 #undef SETERRQ
4636 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4637 
4638 EXTERN_C_BEGIN
4639 #undef __FUNCT__
4640 #define __FUNCT__ "matsetvaluesmpiaij_"
4641 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4642 {
4643   Mat            mat = *mmat;
4644   PetscInt       m = *mm, n = *mn;
4645   InsertMode     addv = *maddv;
4646   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4647   PetscScalar    value;
4648   PetscErrorCode ierr;
4649 
4650   MatPreallocated(mat);
4651   if (mat->insertmode == NOT_SET_VALUES) {
4652     mat->insertmode = addv;
4653   }
4654 #if defined(PETSC_USE_DEBUG)
4655   else if (mat->insertmode != addv) {
4656     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4657   }
4658 #endif
4659   {
4660   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4661   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4662   PetscTruth     roworiented = aij->roworiented;
4663 
4664   /* Some Variables required in the macro */
4665   Mat            A = aij->A;
4666   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4667   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4668   PetscScalar    *aa = a->a;
4669   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4670   Mat            B = aij->B;
4671   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4672   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4673   PetscScalar    *ba = b->a;
4674 
4675   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4676   PetscInt       nonew = a->nonew;
4677   PetscScalar    *ap1,*ap2;
4678 
4679   PetscFunctionBegin;
4680   for (i=0; i<m; i++) {
4681     if (im[i] < 0) continue;
4682 #if defined(PETSC_USE_DEBUG)
4683     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4684 #endif
4685     if (im[i] >= rstart && im[i] < rend) {
4686       row      = im[i] - rstart;
4687       lastcol1 = -1;
4688       rp1      = aj + ai[row];
4689       ap1      = aa + ai[row];
4690       rmax1    = aimax[row];
4691       nrow1    = ailen[row];
4692       low1     = 0;
4693       high1    = nrow1;
4694       lastcol2 = -1;
4695       rp2      = bj + bi[row];
4696       ap2      = ba + bi[row];
4697       rmax2    = bimax[row];
4698       nrow2    = bilen[row];
4699       low2     = 0;
4700       high2    = nrow2;
4701 
4702       for (j=0; j<n; j++) {
4703         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4704         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4705         if (in[j] >= cstart && in[j] < cend){
4706           col = in[j] - cstart;
4707           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4708         } else if (in[j] < 0) continue;
4709 #if defined(PETSC_USE_DEBUG)
4710         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4711 #endif
4712         else {
4713           if (mat->was_assembled) {
4714             if (!aij->colmap) {
4715               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4716             }
4717 #if defined (PETSC_USE_CTABLE)
4718             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4719 	    col--;
4720 #else
4721             col = aij->colmap[in[j]] - 1;
4722 #endif
4723             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4724               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4725               col =  in[j];
4726               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4727               B = aij->B;
4728               b = (Mat_SeqAIJ*)B->data;
4729               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4730               rp2      = bj + bi[row];
4731               ap2      = ba + bi[row];
4732               rmax2    = bimax[row];
4733               nrow2    = bilen[row];
4734               low2     = 0;
4735               high2    = nrow2;
4736               bm       = aij->B->rmap.n;
4737               ba = b->a;
4738             }
4739           } else col = in[j];
4740           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4741         }
4742       }
4743     } else {
4744       if (!aij->donotstash) {
4745         if (roworiented) {
4746           if (ignorezeroentries && v[i*n] == 0.0) continue;
4747           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4748         } else {
4749           if (ignorezeroentries && v[i] == 0.0) continue;
4750           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4751         }
4752       }
4753     }
4754   }}
4755   PetscFunctionReturnVoid();
4756 }
4757 EXTERN_C_END
4758 
4759