xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4529a9f4da609cb2b178f9ce929ea0038401b7d2)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatRelax_MPIAIJ"
1015 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1016 {
1017   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1018   PetscErrorCode ierr;
1019   Vec            bb1;
1020 
1021   PetscFunctionBegin;
1022   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1023 
1024   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1025 
1026   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1027     if (flag & SOR_ZERO_INITIAL_GUESS) {
1028       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1029       its--;
1030     }
1031 
1032     while (its--) {
1033       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035 
1036       /* update rhs: bb1 = bb - B*x */
1037       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1038       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1039 
1040       /* local sweep */
1041       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1042       CHKERRQ(ierr);
1043     }
1044   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1045     if (flag & SOR_ZERO_INITIAL_GUESS) {
1046       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1047       its--;
1048     }
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else {
1079     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1080   }
1081 
1082   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatPermute_MPIAIJ"
1088 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1089 {
1090   MPI_Comm       comm,pcomm;
1091   PetscInt       first,local_size,nrows,*rows;
1092   int            ntids;
1093   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1098   /* make a collective version of 'rowp' */
1099   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1100   if (pcomm==comm) {
1101     crowp = rowp;
1102   } else {
1103     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1104     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1105     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1106     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1107   }
1108   /* collect the global row permutation and invert it */
1109   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1110   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1111   if (pcomm!=comm) {
1112     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1113   }
1114   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1115   /* get the local target indices */
1116   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1119   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1120   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1122   /* the column permutation is so much easier;
1123      make a local version of 'colp' and invert it */
1124   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1125   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1126   if (ntids==1) {
1127     lcolp = colp;
1128   } else {
1129     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1130     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1131     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1132   }
1133   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1134   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1135   if (ntids>1) {
1136     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1137     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1138   }
1139   /* now we just get the submatrix */
1140   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1141   /* clean up */
1142   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1143   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1149 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1150 {
1151   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1152   Mat            A = mat->A,B = mat->B;
1153   PetscErrorCode ierr;
1154   PetscReal      isend[5],irecv[5];
1155 
1156   PetscFunctionBegin;
1157   info->block_size     = 1.0;
1158   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1159   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1160   isend[3] = info->memory;  isend[4] = info->mallocs;
1161   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1163   isend[3] += info->memory;  isend[4] += info->mallocs;
1164   if (flag == MAT_LOCAL) {
1165     info->nz_used      = isend[0];
1166     info->nz_allocated = isend[1];
1167     info->nz_unneeded  = isend[2];
1168     info->memory       = isend[3];
1169     info->mallocs      = isend[4];
1170   } else if (flag == MAT_GLOBAL_MAX) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else if (flag == MAT_GLOBAL_SUM) {
1178     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1179     info->nz_used      = irecv[0];
1180     info->nz_allocated = irecv[1];
1181     info->nz_unneeded  = irecv[2];
1182     info->memory       = irecv[3];
1183     info->mallocs      = irecv[4];
1184   }
1185   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1186   info->fill_ratio_needed = 0;
1187   info->factor_mallocs    = 0;
1188   info->rows_global       = (double)matin->rmap.N;
1189   info->columns_global    = (double)matin->cmap.N;
1190   info->rows_local        = (double)matin->rmap.n;
1191   info->columns_local     = (double)matin->cmap.N;
1192 
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPIAIJ"
1198 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1199 {
1200   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   switch (op) {
1205   case MAT_NO_NEW_NONZERO_LOCATIONS:
1206   case MAT_YES_NEW_NONZERO_LOCATIONS:
1207   case MAT_COLUMNS_UNSORTED:
1208   case MAT_COLUMNS_SORTED:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_KEEP_ZEROED_ROWS:
1211   case MAT_NEW_NONZERO_LOCATION_ERR:
1212   case MAT_USE_INODES:
1213   case MAT_DO_NOT_USE_INODES:
1214   case MAT_IGNORE_ZERO_ENTRIES:
1215     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1217     break;
1218   case MAT_ROW_ORIENTED:
1219     a->roworiented = PETSC_TRUE;
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1222     break;
1223   case MAT_ROWS_SORTED:
1224   case MAT_ROWS_UNSORTED:
1225   case MAT_YES_NEW_DIAGONALS:
1226     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1227     break;
1228   case MAT_COLUMN_ORIENTED:
1229     a->roworiented = PETSC_FALSE;
1230     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1231     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_OFF_PROC_ENTRIES:
1234     a->donotstash = PETSC_TRUE;
1235     break;
1236   case MAT_NO_NEW_DIAGONALS:
1237     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242   case MAT_HERMITIAN:
1243   case MAT_SYMMETRY_ETERNAL:
1244     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1245     break;
1246   case MAT_NOT_SYMMETRIC:
1247   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1248   case MAT_NOT_HERMITIAN:
1249   case MAT_NOT_SYMMETRY_ETERNAL:
1250     break;
1251   default:
1252     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIAIJ"
1259 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1265   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1266   PetscInt       *cmap,*idx_p;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1277     PetscInt     max = 1,tmp;
1278     for (i=0; i<matin->rmap.n; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1284   }
1285 
1286   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1287   lrow = row - rstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]];
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1320             else break;
1321           }
1322           imark = i;
1323         }
1324         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1325         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1326       }
1327     } else {
1328       if (idx) *idx = 0;
1329       if (v)   *v   = 0;
1330     }
1331   }
1332   *nz = nztot;
1333   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1334   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1340 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1343 
1344   PetscFunctionBegin;
1345   if (!aij->getrowactive) {
1346     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1347   }
1348   aij->getrowactive = PETSC_FALSE;
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatNorm_MPIAIJ"
1354 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1355 {
1356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1357   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,cstart = mat->cmap.rstart;
1360   PetscReal      sum = 0.0;
1361   PetscScalar    *v;
1362 
1363   PetscFunctionBegin;
1364   if (aij->size == 1) {
1365     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1366   } else {
1367     if (type == NORM_FROBENIUS) {
1368       v = amat->a;
1369       for (i=0; i<amat->nz; i++) {
1370 #if defined(PETSC_USE_COMPLEX)
1371         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1372 #else
1373         sum += (*v)*(*v); v++;
1374 #endif
1375       }
1376       v = bmat->a;
1377       for (i=0; i<bmat->nz; i++) {
1378 #if defined(PETSC_USE_COMPLEX)
1379         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1380 #else
1381         sum += (*v)*(*v); v++;
1382 #endif
1383       }
1384       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1385       *norm = sqrt(*norm);
1386     } else if (type == NORM_1) { /* max column norm */
1387       PetscReal *tmp,*tmp2;
1388       PetscInt    *jj,*garray = aij->garray;
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1391       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1392       *norm = 0.0;
1393       v = amat->a; jj = amat->j;
1394       for (j=0; j<amat->nz; j++) {
1395         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1396       }
1397       v = bmat->a; jj = bmat->j;
1398       for (j=0; j<bmat->nz; j++) {
1399         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1400       }
1401       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1402       for (j=0; j<mat->cmap.N; j++) {
1403         if (tmp2[j] > *norm) *norm = tmp2[j];
1404       }
1405       ierr = PetscFree(tmp);CHKERRQ(ierr);
1406       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1407     } else if (type == NORM_INFINITY) { /* max row norm */
1408       PetscReal ntemp = 0.0;
1409       for (j=0; j<aij->A->rmap.n; j++) {
1410         v = amat->a + amat->i[j];
1411         sum = 0.0;
1412         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1413           sum += PetscAbsScalar(*v); v++;
1414         }
1415         v = bmat->a + bmat->i[j];
1416         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1417           sum += PetscAbsScalar(*v); v++;
1418         }
1419         if (sum > ntemp) ntemp = sum;
1420       }
1421       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1422     } else {
1423       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1424     }
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatTranspose_MPIAIJ"
1431 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1432 {
1433   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1434   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1435   PetscErrorCode ierr;
1436   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1437   PetscInt       cstart=A->cmap.rstart,ncol;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1443 
1444   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1445   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1446   ai = Aloc->i; aj = Aloc->j;
1447   bi = Bloc->i; bj = Bloc->j;
1448   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1449   cols = d_nnz + na + 1; /* work space to be used by B part */
1450   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1451   for (i=0; i<ai[ma]; i++){
1452     d_nnz[aj[i]] ++;
1453     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1454   }
1455 
1456   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1457   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1458   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1459   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1460 
1461   /* copy over the A part */
1462   array = Aloc->a;
1463   row = A->rmap.rstart;
1464   for (i=0; i<ma; i++) {
1465     ncol = ai[i+1]-ai[i];
1466     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1467     row++; array += ncol; aj += ncol;
1468   }
1469   aj = Aloc->j;
1470   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1471 
1472   /* copy over the B part */
1473   array = Bloc->a;
1474   row = A->rmap.rstart;
1475   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1476   for (i=0; i<mb; i++) {
1477     ncol = bi[i+1]-bi[i];
1478     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1479     row++; array += ncol; cols += ncol;
1480   }
1481   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1482   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1483   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   if (matout) {
1485     *matout = B;
1486   } else {
1487     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1488   }
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1494 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1495 {
1496   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1497   Mat            a = aij->A,b = aij->B;
1498   PetscErrorCode ierr;
1499   PetscInt       s1,s2,s3;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1503   if (rr) {
1504     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1505     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1506     /* Overlap communication with computation. */
1507     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1508   }
1509   if (ll) {
1510     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1511     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1512     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1513   }
1514   /* scale  the diagonal block */
1515   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1516 
1517   if (rr) {
1518     /* Do a scatter end and then right scale the off-diagonal block */
1519     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1520     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1521   }
1522 
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1528 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1529 {
1530   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1535   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1540 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1541 {
1542   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1543   PetscErrorCode ierr;
1544 
1545   PetscFunctionBegin;
1546   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatEqual_MPIAIJ"
1552 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1553 {
1554   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1555   Mat            a,b,c,d;
1556   PetscTruth     flg;
1557   PetscErrorCode ierr;
1558 
1559   PetscFunctionBegin;
1560   a = matA->A; b = matA->B;
1561   c = matB->A; d = matB->B;
1562 
1563   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1564   if (flg) {
1565     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1566   }
1567   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatCopy_MPIAIJ"
1573 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1574 {
1575   PetscErrorCode ierr;
1576   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1577   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1578 
1579   PetscFunctionBegin;
1580   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1581   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1582     /* because of the column compression in the off-processor part of the matrix a->B,
1583        the number of columns in a->B and b->B may be different, hence we cannot call
1584        the MatCopy() directly on the two parts. If need be, we can provide a more
1585        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1586        then copying the submatrices */
1587     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1588   } else {
1589     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1590     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1597 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1598 {
1599   PetscErrorCode ierr;
1600 
1601   PetscFunctionBegin;
1602   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #include "petscblaslapack.h"
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatAXPY_MPIAIJ"
1609 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1610 {
1611   PetscErrorCode ierr;
1612   PetscInt       i;
1613   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1614   PetscBLASInt   bnz,one=1;
1615   Mat_SeqAIJ     *x,*y;
1616 
1617   PetscFunctionBegin;
1618   if (str == SAME_NONZERO_PATTERN) {
1619     PetscScalar alpha = a;
1620     x = (Mat_SeqAIJ *)xx->A->data;
1621     y = (Mat_SeqAIJ *)yy->A->data;
1622     bnz = (PetscBLASInt)x->nz;
1623     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1624     x = (Mat_SeqAIJ *)xx->B->data;
1625     y = (Mat_SeqAIJ *)yy->B->data;
1626     bnz = (PetscBLASInt)x->nz;
1627     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1628   } else if (str == SUBSET_NONZERO_PATTERN) {
1629     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1630 
1631     x = (Mat_SeqAIJ *)xx->B->data;
1632     y = (Mat_SeqAIJ *)yy->B->data;
1633     if (y->xtoy && y->XtoY != xx->B) {
1634       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1635       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1636     }
1637     if (!y->xtoy) { /* get xtoy */
1638       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1639       y->XtoY = xx->B;
1640       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1641     }
1642     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1643   } else {
1644     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1645   }
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatConjugate_MPIAIJ"
1653 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1654 {
1655 #if defined(PETSC_USE_COMPLEX)
1656   PetscErrorCode ierr;
1657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1658 
1659   PetscFunctionBegin;
1660   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1661   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1662 #else
1663   PetscFunctionBegin;
1664 #endif
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatRealPart_MPIAIJ"
1670 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1671 {
1672   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1677   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1683 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1684 {
1685   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1690   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #ifdef PETSC_HAVE_PBGL
1695 
1696 #include <boost/parallel/mpi/bsp_process_group.hpp>
1697 #include <boost/graph/distributed/ilu_default_graph.hpp>
1698 #include <boost/graph/distributed/ilu_0_block.hpp>
1699 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1700 #include <boost/graph/distributed/petsc/interface.hpp>
1701 #include <boost/multi_array.hpp>
1702 #include <boost/parallel/distributed_property_map.hpp>
1703 
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1706 /*
1707   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1708 */
1709 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1710 {
1711   namespace petsc = boost::distributed::petsc;
1712 
1713   namespace graph_dist = boost::graph::distributed;
1714   using boost::graph::distributed::ilu_default::process_group_type;
1715   using boost::graph::ilu_permuted;
1716 
1717   PetscTruth      row_identity, col_identity;
1718   PetscContainer  c;
1719   PetscInt        m, n, M, N;
1720   PetscErrorCode  ierr;
1721 
1722   PetscFunctionBegin;
1723   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1724   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1725   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1726   if (!row_identity || !col_identity) {
1727     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1728   }
1729 
1730   process_group_type pg;
1731   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1732   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1733   lgraph_type&   level_graph = *lgraph_p;
1734   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1735 
1736   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1737   ilu_permuted(level_graph);
1738 
1739   /* put together the new matrix */
1740   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1741   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1742   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1743   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1744   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1745   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1746   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1747   (*fact)->factor = FACTOR_LU;
1748 
1749   ierr = PetscContainerCreate(A->comm, &c);
1750   ierr = PetscContainerSetPointer(c, lgraph_p);
1751   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1757 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1758 {
1759   PetscFunctionBegin;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 #undef __FUNCT__
1764 #define __FUNCT__ "MatSolve_MPIAIJ"
1765 /*
1766   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1767 */
1768 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1769 {
1770   namespace graph_dist = boost::graph::distributed;
1771 
1772   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1773   lgraph_type*   lgraph_p;
1774   PetscContainer c;
1775   PetscErrorCode ierr;
1776 
1777   PetscFunctionBegin;
1778   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1779   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1780   ierr = VecCopy(b, x); CHKERRQ(ierr);
1781 
1782   PetscScalar* array_x;
1783   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1784   PetscInt sx;
1785   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1786 
1787   PetscScalar* array_b;
1788   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1789   PetscInt sb;
1790   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1791 
1792   lgraph_type&   level_graph = *lgraph_p;
1793   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1794 
1795   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1796   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1797                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1798 
1799   typedef boost::iterator_property_map<array_ref_type::iterator,
1800                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1801   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1802                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1803 
1804   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1805 
1806   PetscFunctionReturn(0);
1807 }
1808 #endif
1809 
1810 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1811   PetscInt       nzlocal,nsends,nrecvs;
1812   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1813   PetscScalar    *sbuf_a,**rbuf_a;
1814   PetscErrorCode (*MatDestroy)(Mat);
1815 } Mat_Redundant;
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1819 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1820 {
1821   PetscErrorCode       ierr;
1822   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1823   PetscInt             i;
1824 
1825   PetscFunctionBegin;
1826   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1827   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1828   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1829   for (i=0; i<redund->nrecvs; i++){
1830     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1831     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1832   }
1833   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1834   ierr = PetscFree(redund);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatDestroy_MatRedundant"
1840 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1841 {
1842   PetscErrorCode  ierr;
1843   PetscContainer  container;
1844   Mat_Redundant   *redund=PETSC_NULL;
1845 
1846   PetscFunctionBegin;
1847   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1848   if (container) {
1849     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1850   } else {
1851     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1852   }
1853   A->ops->destroy = redund->MatDestroy;
1854   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1855   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1856   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1862 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1863 {
1864   PetscMPIInt    rank,size;
1865   MPI_Comm       comm=mat->comm;
1866   PetscErrorCode ierr;
1867   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1868   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1869   PetscInt       *rowrange=mat->rmap.range;
1870   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1871   Mat            A=aij->A,B=aij->B,C=*matredundant;
1872   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1873   PetscScalar    *sbuf_a;
1874   PetscInt       nzlocal=a->nz+b->nz;
1875   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1876   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1877   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1878   PetscScalar    *vals,*aworkA,*aworkB;
1879   PetscMPIInt    tag1,tag2,tag3,imdex;
1880   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1881                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1882   MPI_Status     recv_status,*send_status;
1883   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1884   PetscInt       **rbuf_j=PETSC_NULL;
1885   PetscScalar    **rbuf_a=PETSC_NULL;
1886   Mat_Redundant  *redund=PETSC_NULL;
1887   PetscContainer container;
1888 
1889   PetscFunctionBegin;
1890   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1892 
1893   if (reuse == MAT_REUSE_MATRIX) {
1894     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1895     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1896     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1897     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1898     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1899     if (container) {
1900       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1901     } else {
1902       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1903     }
1904     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1905 
1906     nsends    = redund->nsends;
1907     nrecvs    = redund->nrecvs;
1908     send_rank = redund->send_rank; recv_rank = send_rank + size;
1909     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1910     sbuf_j    = redund->sbuf_j;
1911     sbuf_a    = redund->sbuf_a;
1912     rbuf_j    = redund->rbuf_j;
1913     rbuf_a    = redund->rbuf_a;
1914   }
1915 
1916   if (reuse == MAT_INITIAL_MATRIX){
1917     PetscMPIInt  subrank,subsize;
1918     PetscInt     nleftover,np_subcomm;
1919     /* get the destination processors' id send_rank, nsends and nrecvs */
1920     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1921     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1922     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1923     recv_rank = send_rank + size;
1924     np_subcomm = size/nsubcomm;
1925     nleftover  = size - nsubcomm*np_subcomm;
1926     nsends = 0; nrecvs = 0;
1927     for (i=0; i<size; i++){ /* i=rank*/
1928       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1929         send_rank[nsends] = i; nsends++;
1930         recv_rank[nrecvs++] = i;
1931       }
1932     }
1933     if (rank >= size - nleftover){/* this proc is a leftover processor */
1934       i = size-nleftover-1;
1935       j = 0;
1936       while (j < nsubcomm - nleftover){
1937         send_rank[nsends++] = i;
1938         i--; j++;
1939       }
1940     }
1941 
1942     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1943       for (i=0; i<nleftover; i++){
1944         recv_rank[nrecvs++] = size-nleftover+i;
1945       }
1946     }
1947 
1948     /* allocate sbuf_j, sbuf_a */
1949     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1950     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1951     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1952   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1953 
1954   /* copy mat's local entries into the buffers */
1955   if (reuse == MAT_INITIAL_MATRIX){
1956     rownz_max = 0;
1957     rptr = sbuf_j;
1958     cols = sbuf_j + rend-rstart + 1;
1959     vals = sbuf_a;
1960     rptr[0] = 0;
1961     for (i=0; i<rend-rstart; i++){
1962       row = i + rstart;
1963       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1964       ncols  = nzA + nzB;
1965       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1966       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1967       /* load the column indices for this row into cols */
1968       lwrite = 0;
1969       for (l=0; l<nzB; l++) {
1970         if ((ctmp = bmap[cworkB[l]]) < cstart){
1971           vals[lwrite]   = aworkB[l];
1972           cols[lwrite++] = ctmp;
1973         }
1974       }
1975       for (l=0; l<nzA; l++){
1976         vals[lwrite]   = aworkA[l];
1977         cols[lwrite++] = cstart + cworkA[l];
1978       }
1979       for (l=0; l<nzB; l++) {
1980         if ((ctmp = bmap[cworkB[l]]) >= cend){
1981           vals[lwrite]   = aworkB[l];
1982           cols[lwrite++] = ctmp;
1983         }
1984       }
1985       vals += ncols;
1986       cols += ncols;
1987       rptr[i+1] = rptr[i] + ncols;
1988       if (rownz_max < ncols) rownz_max = ncols;
1989     }
1990     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1991   } else { /* only copy matrix values into sbuf_a */
1992     rptr = sbuf_j;
1993     vals = sbuf_a;
1994     rptr[0] = 0;
1995     for (i=0; i<rend-rstart; i++){
1996       row = i + rstart;
1997       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1998       ncols  = nzA + nzB;
1999       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2000       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2001       lwrite = 0;
2002       for (l=0; l<nzB; l++) {
2003         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2004       }
2005       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2006       for (l=0; l<nzB; l++) {
2007         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2008       }
2009       vals += ncols;
2010       rptr[i+1] = rptr[i] + ncols;
2011     }
2012   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2013 
2014   /* send nzlocal to others, and recv other's nzlocal */
2015   /*--------------------------------------------------*/
2016   if (reuse == MAT_INITIAL_MATRIX){
2017     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2018     s_waits2 = s_waits3 + nsends;
2019     s_waits1 = s_waits2 + nsends;
2020     r_waits1 = s_waits1 + nsends;
2021     r_waits2 = r_waits1 + nrecvs;
2022     r_waits3 = r_waits2 + nrecvs;
2023   } else {
2024     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2025     r_waits3 = s_waits3 + nsends;
2026   }
2027 
2028   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2029   if (reuse == MAT_INITIAL_MATRIX){
2030     /* get new tags to keep the communication clean */
2031     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2032     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2033     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2034     rbuf_nz = sbuf_nz + nsends;
2035 
2036     /* post receives of other's nzlocal */
2037     for (i=0; i<nrecvs; i++){
2038       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2039     }
2040     /* send nzlocal to others */
2041     for (i=0; i<nsends; i++){
2042       sbuf_nz[i] = nzlocal;
2043       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2044     }
2045     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2046     count = nrecvs;
2047     while (count) {
2048       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2049       recv_rank[imdex] = recv_status.MPI_SOURCE;
2050       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2051       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2052 
2053       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2054       rbuf_nz[imdex] += i + 2;
2055       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2056       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2057       count--;
2058     }
2059     /* wait on sends of nzlocal */
2060     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2061     /* send mat->i,j to others, and recv from other's */
2062     /*------------------------------------------------*/
2063     for (i=0; i<nsends; i++){
2064       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2065       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2066     }
2067     /* wait on receives of mat->i,j */
2068     /*------------------------------*/
2069     count = nrecvs;
2070     while (count) {
2071       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2072       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2073       count--;
2074     }
2075     /* wait on sends of mat->i,j */
2076     /*---------------------------*/
2077     if (nsends) {
2078       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2079     }
2080   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2081 
2082   /* post receives, send and receive mat->a */
2083   /*----------------------------------------*/
2084   for (imdex=0; imdex<nrecvs; imdex++) {
2085     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2086   }
2087   for (i=0; i<nsends; i++){
2088     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2089   }
2090   count = nrecvs;
2091   while (count) {
2092     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2093     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2094     count--;
2095   }
2096   if (nsends) {
2097     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2098   }
2099 
2100   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2101 
2102   /* create redundant matrix */
2103   /*-------------------------*/
2104   if (reuse == MAT_INITIAL_MATRIX){
2105     /* compute rownz_max for preallocation */
2106     for (imdex=0; imdex<nrecvs; imdex++){
2107       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2108       rptr = rbuf_j[imdex];
2109       for (i=0; i<j; i++){
2110         ncols = rptr[i+1] - rptr[i];
2111         if (rownz_max < ncols) rownz_max = ncols;
2112       }
2113     }
2114 
2115     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2116     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2117     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2118     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2119     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2120   } else {
2121     C = *matredundant;
2122   }
2123 
2124   /* insert local matrix entries */
2125   rptr = sbuf_j;
2126   cols = sbuf_j + rend-rstart + 1;
2127   vals = sbuf_a;
2128   for (i=0; i<rend-rstart; i++){
2129     row   = i + rstart;
2130     ncols = rptr[i+1] - rptr[i];
2131     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2132     vals += ncols;
2133     cols += ncols;
2134   }
2135   /* insert received matrix entries */
2136   for (imdex=0; imdex<nrecvs; imdex++){
2137     rstart = rowrange[recv_rank[imdex]];
2138     rend   = rowrange[recv_rank[imdex]+1];
2139     rptr = rbuf_j[imdex];
2140     cols = rbuf_j[imdex] + rend-rstart + 1;
2141     vals = rbuf_a[imdex];
2142     for (i=0; i<rend-rstart; i++){
2143       row   = i + rstart;
2144       ncols = rptr[i+1] - rptr[i];
2145       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2146       vals += ncols;
2147       cols += ncols;
2148     }
2149   }
2150   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2153   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2154   if (reuse == MAT_INITIAL_MATRIX){
2155     PetscContainer container;
2156     *matredundant = C;
2157     /* create a supporting struct and attach it to C for reuse */
2158     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2159     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2160     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2161     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2162     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2163 
2164     redund->nzlocal = nzlocal;
2165     redund->nsends  = nsends;
2166     redund->nrecvs  = nrecvs;
2167     redund->send_rank = send_rank;
2168     redund->sbuf_nz = sbuf_nz;
2169     redund->sbuf_j  = sbuf_j;
2170     redund->sbuf_a  = sbuf_a;
2171     redund->rbuf_j  = rbuf_j;
2172     redund->rbuf_a  = rbuf_a;
2173 
2174     redund->MatDestroy = C->ops->destroy;
2175     C->ops->destroy    = MatDestroy_MatRedundant;
2176   }
2177   PetscFunctionReturn(0);
2178 }
2179 
2180 /* -------------------------------------------------------------------*/
2181 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2182        MatGetRow_MPIAIJ,
2183        MatRestoreRow_MPIAIJ,
2184        MatMult_MPIAIJ,
2185 /* 4*/ MatMultAdd_MPIAIJ,
2186        MatMultTranspose_MPIAIJ,
2187        MatMultTransposeAdd_MPIAIJ,
2188 #ifdef PETSC_HAVE_PBGL
2189        MatSolve_MPIAIJ,
2190 #else
2191        0,
2192 #endif
2193        0,
2194        0,
2195 /*10*/ 0,
2196        0,
2197        0,
2198        MatRelax_MPIAIJ,
2199        MatTranspose_MPIAIJ,
2200 /*15*/ MatGetInfo_MPIAIJ,
2201        MatEqual_MPIAIJ,
2202        MatGetDiagonal_MPIAIJ,
2203        MatDiagonalScale_MPIAIJ,
2204        MatNorm_MPIAIJ,
2205 /*20*/ MatAssemblyBegin_MPIAIJ,
2206        MatAssemblyEnd_MPIAIJ,
2207        0,
2208        MatSetOption_MPIAIJ,
2209        MatZeroEntries_MPIAIJ,
2210 /*25*/ MatZeroRows_MPIAIJ,
2211        0,
2212 #ifdef PETSC_HAVE_PBGL
2213        MatLUFactorNumeric_MPIAIJ,
2214 #else
2215        0,
2216 #endif
2217        0,
2218        0,
2219 /*30*/ MatSetUpPreallocation_MPIAIJ,
2220 #ifdef PETSC_HAVE_PBGL
2221        MatILUFactorSymbolic_MPIAIJ,
2222 #else
2223        0,
2224 #endif
2225        0,
2226        0,
2227        0,
2228 /*35*/ MatDuplicate_MPIAIJ,
2229        0,
2230        0,
2231        0,
2232        0,
2233 /*40*/ MatAXPY_MPIAIJ,
2234        MatGetSubMatrices_MPIAIJ,
2235        MatIncreaseOverlap_MPIAIJ,
2236        MatGetValues_MPIAIJ,
2237        MatCopy_MPIAIJ,
2238 /*45*/ 0,
2239        MatScale_MPIAIJ,
2240        0,
2241        0,
2242        0,
2243 /*50*/ MatSetBlockSize_MPIAIJ,
2244        0,
2245        0,
2246        0,
2247        0,
2248 /*55*/ MatFDColoringCreate_MPIAIJ,
2249        0,
2250        MatSetUnfactored_MPIAIJ,
2251        MatPermute_MPIAIJ,
2252        0,
2253 /*60*/ MatGetSubMatrix_MPIAIJ,
2254        MatDestroy_MPIAIJ,
2255        MatView_MPIAIJ,
2256        0,
2257        0,
2258 /*65*/ 0,
2259        0,
2260        0,
2261        0,
2262        0,
2263 /*70*/ 0,
2264        0,
2265        MatSetColoring_MPIAIJ,
2266 #if defined(PETSC_HAVE_ADIC)
2267        MatSetValuesAdic_MPIAIJ,
2268 #else
2269        0,
2270 #endif
2271        MatSetValuesAdifor_MPIAIJ,
2272 /*75*/ 0,
2273        0,
2274        0,
2275        0,
2276        0,
2277 /*80*/ 0,
2278        0,
2279        0,
2280        0,
2281 /*84*/ MatLoad_MPIAIJ,
2282        0,
2283        0,
2284        0,
2285        0,
2286        0,
2287 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2288        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2289        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2290        MatPtAP_Basic,
2291        MatPtAPSymbolic_MPIAIJ,
2292 /*95*/ MatPtAPNumeric_MPIAIJ,
2293        0,
2294        0,
2295        0,
2296        0,
2297 /*100*/0,
2298        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2299        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2300        MatConjugate_MPIAIJ,
2301        0,
2302 /*105*/MatSetValuesRow_MPIAIJ,
2303        MatRealPart_MPIAIJ,
2304        MatImaginaryPart_MPIAIJ,
2305        0,
2306        0,
2307 /*110*/0,
2308        MatGetRedundantMatrix_MPIAIJ};
2309 
2310 /* ----------------------------------------------------------------------------------------*/
2311 
2312 EXTERN_C_BEGIN
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2315 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2316 {
2317   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2322   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2323   PetscFunctionReturn(0);
2324 }
2325 EXTERN_C_END
2326 
2327 EXTERN_C_BEGIN
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2330 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2331 {
2332   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2333   PetscErrorCode ierr;
2334 
2335   PetscFunctionBegin;
2336   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2337   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 EXTERN_C_END
2341 
2342 #include "petscpc.h"
2343 EXTERN_C_BEGIN
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2346 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2347 {
2348   Mat_MPIAIJ     *b;
2349   PetscErrorCode ierr;
2350   PetscInt       i;
2351 
2352   PetscFunctionBegin;
2353   B->preallocated = PETSC_TRUE;
2354   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2355   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2356   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2357   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2358 
2359   B->rmap.bs = B->cmap.bs = 1;
2360   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2361   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2362   if (d_nnz) {
2363     for (i=0; i<B->rmap.n; i++) {
2364       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2365     }
2366   }
2367   if (o_nnz) {
2368     for (i=0; i<B->rmap.n; i++) {
2369       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2370     }
2371   }
2372   b = (Mat_MPIAIJ*)B->data;
2373 
2374   /* Explicitly create 2 MATSEQAIJ matrices. */
2375   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2376   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2377   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2378   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2379   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2380   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2381   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2382   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2383 
2384   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2385   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2386 
2387   PetscFunctionReturn(0);
2388 }
2389 EXTERN_C_END
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2393 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2394 {
2395   Mat            mat;
2396   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   *newmat       = 0;
2401   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2402   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2403   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2404   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2405   a    = (Mat_MPIAIJ*)mat->data;
2406 
2407   mat->factor       = matin->factor;
2408   mat->rmap.bs      = matin->rmap.bs;
2409   mat->assembled    = PETSC_TRUE;
2410   mat->insertmode   = NOT_SET_VALUES;
2411   mat->preallocated = PETSC_TRUE;
2412 
2413   a->size           = oldmat->size;
2414   a->rank           = oldmat->rank;
2415   a->donotstash     = oldmat->donotstash;
2416   a->roworiented    = oldmat->roworiented;
2417   a->rowindices     = 0;
2418   a->rowvalues      = 0;
2419   a->getrowactive   = PETSC_FALSE;
2420 
2421   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2422   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2423 
2424   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2425   if (oldmat->colmap) {
2426 #if defined (PETSC_USE_CTABLE)
2427     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2428 #else
2429     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2430     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2431     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2432 #endif
2433   } else a->colmap = 0;
2434   if (oldmat->garray) {
2435     PetscInt len;
2436     len  = oldmat->B->cmap.n;
2437     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2438     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2439     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2440   } else a->garray = 0;
2441 
2442   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2443   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2444   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2445   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2446   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2447   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2448   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2450   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2451   *newmat = mat;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #include "petscsys.h"
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatLoad_MPIAIJ"
2459 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2460 {
2461   Mat            A;
2462   PetscScalar    *vals,*svals;
2463   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2464   MPI_Status     status;
2465   PetscErrorCode ierr;
2466   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2467   PetscInt       i,nz,j,rstart,rend,mmax;
2468   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2469   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2470   PetscInt       cend,cstart,n,*rowners;
2471   int            fd;
2472 
2473   PetscFunctionBegin;
2474   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2475   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2476   if (!rank) {
2477     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2478     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2479     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2480   }
2481 
2482   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2483   M = header[1]; N = header[2];
2484   /* determine ownership of all rows */
2485   m    = M/size + ((M % size) > rank);
2486   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2487   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2488 
2489   /* First process needs enough room for process with most rows */
2490   if (!rank) {
2491     mmax       = rowners[1];
2492     for (i=2; i<size; i++) {
2493       mmax = PetscMax(mmax,rowners[i]);
2494     }
2495   } else mmax = m;
2496 
2497   rowners[0] = 0;
2498   for (i=2; i<=size; i++) {
2499     rowners[i] += rowners[i-1];
2500   }
2501   rstart = rowners[rank];
2502   rend   = rowners[rank+1];
2503 
2504   /* distribute row lengths to all processors */
2505   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2506   if (!rank) {
2507     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2508     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2509     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2510     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2511     for (j=0; j<m; j++) {
2512       procsnz[0] += ourlens[j];
2513     }
2514     for (i=1; i<size; i++) {
2515       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2516       /* calculate the number of nonzeros on each processor */
2517       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2518         procsnz[i] += rowlengths[j];
2519       }
2520       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2521     }
2522     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2523   } else {
2524     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2525   }
2526 
2527   if (!rank) {
2528     /* determine max buffer needed and allocate it */
2529     maxnz = 0;
2530     for (i=0; i<size; i++) {
2531       maxnz = PetscMax(maxnz,procsnz[i]);
2532     }
2533     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2534 
2535     /* read in my part of the matrix column indices  */
2536     nz   = procsnz[0];
2537     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2538     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2539 
2540     /* read in every one elses and ship off */
2541     for (i=1; i<size; i++) {
2542       nz   = procsnz[i];
2543       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2544       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2545     }
2546     ierr = PetscFree(cols);CHKERRQ(ierr);
2547   } else {
2548     /* determine buffer space needed for message */
2549     nz = 0;
2550     for (i=0; i<m; i++) {
2551       nz += ourlens[i];
2552     }
2553     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2554 
2555     /* receive message of column indices*/
2556     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2557     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2558     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2559   }
2560 
2561   /* determine column ownership if matrix is not square */
2562   if (N != M) {
2563     n      = N/size + ((N % size) > rank);
2564     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2565     cstart = cend - n;
2566   } else {
2567     cstart = rstart;
2568     cend   = rend;
2569     n      = cend - cstart;
2570   }
2571 
2572   /* loop over local rows, determining number of off diagonal entries */
2573   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2574   jj = 0;
2575   for (i=0; i<m; i++) {
2576     for (j=0; j<ourlens[i]; j++) {
2577       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2578       jj++;
2579     }
2580   }
2581 
2582   /* create our matrix */
2583   for (i=0; i<m; i++) {
2584     ourlens[i] -= offlens[i];
2585   }
2586   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2587   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2588   ierr = MatSetType(A,type);CHKERRQ(ierr);
2589   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2590 
2591   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2592   for (i=0; i<m; i++) {
2593     ourlens[i] += offlens[i];
2594   }
2595 
2596   if (!rank) {
2597     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2598 
2599     /* read in my part of the matrix numerical values  */
2600     nz   = procsnz[0];
2601     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2602 
2603     /* insert into matrix */
2604     jj      = rstart;
2605     smycols = mycols;
2606     svals   = vals;
2607     for (i=0; i<m; i++) {
2608       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2609       smycols += ourlens[i];
2610       svals   += ourlens[i];
2611       jj++;
2612     }
2613 
2614     /* read in other processors and ship out */
2615     for (i=1; i<size; i++) {
2616       nz   = procsnz[i];
2617       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2618       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2619     }
2620     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2621   } else {
2622     /* receive numeric values */
2623     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2624 
2625     /* receive message of values*/
2626     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2627     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2628     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2629 
2630     /* insert into matrix */
2631     jj      = rstart;
2632     smycols = mycols;
2633     svals   = vals;
2634     for (i=0; i<m; i++) {
2635       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2636       smycols += ourlens[i];
2637       svals   += ourlens[i];
2638       jj++;
2639     }
2640   }
2641   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2642   ierr = PetscFree(vals);CHKERRQ(ierr);
2643   ierr = PetscFree(mycols);CHKERRQ(ierr);
2644   ierr = PetscFree(rowners);CHKERRQ(ierr);
2645 
2646   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   *newmat = A;
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2654 /*
2655     Not great since it makes two copies of the submatrix, first an SeqAIJ
2656   in local and then by concatenating the local matrices the end result.
2657   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2658 */
2659 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2660 {
2661   PetscErrorCode ierr;
2662   PetscMPIInt    rank,size;
2663   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2664   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2665   Mat            *local,M,Mreuse;
2666   PetscScalar    *vwork,*aa;
2667   MPI_Comm       comm = mat->comm;
2668   Mat_SeqAIJ     *aij;
2669 
2670 
2671   PetscFunctionBegin;
2672   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2673   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2674 
2675   if (call ==  MAT_REUSE_MATRIX) {
2676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2677     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2678     local = &Mreuse;
2679     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2680   } else {
2681     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2682     Mreuse = *local;
2683     ierr   = PetscFree(local);CHKERRQ(ierr);
2684   }
2685 
2686   /*
2687       m - number of local rows
2688       n - number of columns (same on all processors)
2689       rstart - first row in new global matrix generated
2690   */
2691   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2692   if (call == MAT_INITIAL_MATRIX) {
2693     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2694     ii  = aij->i;
2695     jj  = aij->j;
2696 
2697     /*
2698         Determine the number of non-zeros in the diagonal and off-diagonal
2699         portions of the matrix in order to do correct preallocation
2700     */
2701 
2702     /* first get start and end of "diagonal" columns */
2703     if (csize == PETSC_DECIDE) {
2704       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2705       if (mglobal == n) { /* square matrix */
2706 	nlocal = m;
2707       } else {
2708         nlocal = n/size + ((n % size) > rank);
2709       }
2710     } else {
2711       nlocal = csize;
2712     }
2713     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2714     rstart = rend - nlocal;
2715     if (rank == size - 1 && rend != n) {
2716       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2717     }
2718 
2719     /* next, compute all the lengths */
2720     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2721     olens = dlens + m;
2722     for (i=0; i<m; i++) {
2723       jend = ii[i+1] - ii[i];
2724       olen = 0;
2725       dlen = 0;
2726       for (j=0; j<jend; j++) {
2727         if (*jj < rstart || *jj >= rend) olen++;
2728         else dlen++;
2729         jj++;
2730       }
2731       olens[i] = olen;
2732       dlens[i] = dlen;
2733     }
2734     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2735     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2736     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2737     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2738     ierr = PetscFree(dlens);CHKERRQ(ierr);
2739   } else {
2740     PetscInt ml,nl;
2741 
2742     M = *newmat;
2743     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2744     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2745     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2746     /*
2747          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2748        rather than the slower MatSetValues().
2749     */
2750     M->was_assembled = PETSC_TRUE;
2751     M->assembled     = PETSC_FALSE;
2752   }
2753   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2754   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2755   ii  = aij->i;
2756   jj  = aij->j;
2757   aa  = aij->a;
2758   for (i=0; i<m; i++) {
2759     row   = rstart + i;
2760     nz    = ii[i+1] - ii[i];
2761     cwork = jj;     jj += nz;
2762     vwork = aa;     aa += nz;
2763     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2764   }
2765 
2766   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   *newmat = M;
2769 
2770   /* save submatrix used in processor for next request */
2771   if (call ==  MAT_INITIAL_MATRIX) {
2772     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2773     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2774   }
2775 
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 EXTERN_C_BEGIN
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2783 {
2784   PetscInt       m,cstart, cend,j,nnz,i,d;
2785   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2786   const PetscInt *JJ;
2787   PetscScalar    *values;
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2792 
2793   B->rmap.bs = B->cmap.bs = 1;
2794   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2795   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2796   m      = B->rmap.n;
2797   cstart = B->cmap.rstart;
2798   cend   = B->cmap.rend;
2799   rstart = B->rmap.rstart;
2800 
2801   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2802   o_nnz = d_nnz + m;
2803 
2804   for (i=0; i<m; i++) {
2805     nnz     = Ii[i+1]- Ii[i];
2806     JJ      = J + Ii[i];
2807     nnz_max = PetscMax(nnz_max,nnz);
2808     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2809     for (j=0; j<nnz; j++) {
2810       if (*JJ >= cstart) break;
2811       JJ++;
2812     }
2813     d = 0;
2814     for (; j<nnz; j++) {
2815       if (*JJ++ >= cend) break;
2816       d++;
2817     }
2818     d_nnz[i] = d;
2819     o_nnz[i] = nnz - d;
2820   }
2821   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2822   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2823 
2824   if (v) values = (PetscScalar*)v;
2825   else {
2826     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2827     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2828   }
2829 
2830   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2831   for (i=0; i<m; i++) {
2832     ii   = i + rstart;
2833     nnz  = Ii[i+1]- Ii[i];
2834     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2835   }
2836   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2839 
2840   if (!v) {
2841     ierr = PetscFree(values);CHKERRQ(ierr);
2842   }
2843   PetscFunctionReturn(0);
2844 }
2845 EXTERN_C_END
2846 
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2849 /*@
2850    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2851    (the default parallel PETSc format).
2852 
2853    Collective on MPI_Comm
2854 
2855    Input Parameters:
2856 +  B - the matrix
2857 .  i - the indices into j for the start of each local row (starts with zero)
2858 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2859 -  v - optional values in the matrix
2860 
2861    Level: developer
2862 
2863    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2864      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2865 
2866 .keywords: matrix, aij, compressed row, sparse, parallel
2867 
2868 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2869           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2870 @*/
2871 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2872 {
2873   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2874 
2875   PetscFunctionBegin;
2876   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2877   if (f) {
2878     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2879   }
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2885 /*@C
2886    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2887    (the default parallel PETSc format).  For good matrix assembly performance
2888    the user should preallocate the matrix storage by setting the parameters
2889    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2890    performance can be increased by more than a factor of 50.
2891 
2892    Collective on MPI_Comm
2893 
2894    Input Parameters:
2895 +  A - the matrix
2896 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2897            (same value is used for all local rows)
2898 .  d_nnz - array containing the number of nonzeros in the various rows of the
2899            DIAGONAL portion of the local submatrix (possibly different for each row)
2900            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2901            The size of this array is equal to the number of local rows, i.e 'm'.
2902            You must leave room for the diagonal entry even if it is zero.
2903 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2904            submatrix (same value is used for all local rows).
2905 -  o_nnz - array containing the number of nonzeros in the various rows of the
2906            OFF-DIAGONAL portion of the local submatrix (possibly different for
2907            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2908            structure. The size of this array is equal to the number
2909            of local rows, i.e 'm'.
2910 
2911    If the *_nnz parameter is given then the *_nz parameter is ignored
2912 
2913    The AIJ format (also called the Yale sparse matrix format or
2914    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2915    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2916 
2917    The parallel matrix is partitioned such that the first m0 rows belong to
2918    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2919    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2920 
2921    The DIAGONAL portion of the local submatrix of a processor can be defined
2922    as the submatrix which is obtained by extraction the part corresponding
2923    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2924    first row that belongs to the processor, and r2 is the last row belonging
2925    to the this processor. This is a square mxm matrix. The remaining portion
2926    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2927 
2928    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2929 
2930    Example usage:
2931 
2932    Consider the following 8x8 matrix with 34 non-zero values, that is
2933    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2934    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2935    as follows:
2936 
2937 .vb
2938             1  2  0  |  0  3  0  |  0  4
2939     Proc0   0  5  6  |  7  0  0  |  8  0
2940             9  0 10  | 11  0  0  | 12  0
2941     -------------------------------------
2942            13  0 14  | 15 16 17  |  0  0
2943     Proc1   0 18  0  | 19 20 21  |  0  0
2944             0  0  0  | 22 23  0  | 24  0
2945     -------------------------------------
2946     Proc2  25 26 27  |  0  0 28  | 29  0
2947            30  0  0  | 31 32 33  |  0 34
2948 .ve
2949 
2950    This can be represented as a collection of submatrices as:
2951 
2952 .vb
2953       A B C
2954       D E F
2955       G H I
2956 .ve
2957 
2958    Where the submatrices A,B,C are owned by proc0, D,E,F are
2959    owned by proc1, G,H,I are owned by proc2.
2960 
2961    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2962    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2963    The 'M','N' parameters are 8,8, and have the same values on all procs.
2964 
2965    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2966    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2967    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2968    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2969    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2970    matrix, ans [DF] as another SeqAIJ matrix.
2971 
2972    When d_nz, o_nz parameters are specified, d_nz storage elements are
2973    allocated for every row of the local diagonal submatrix, and o_nz
2974    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2975    One way to choose d_nz and o_nz is to use the max nonzerors per local
2976    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2977    In this case, the values of d_nz,o_nz are:
2978 .vb
2979      proc0 : dnz = 2, o_nz = 2
2980      proc1 : dnz = 3, o_nz = 2
2981      proc2 : dnz = 1, o_nz = 4
2982 .ve
2983    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2984    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2985    for proc3. i.e we are using 12+15+10=37 storage locations to store
2986    34 values.
2987 
2988    When d_nnz, o_nnz parameters are specified, the storage is specified
2989    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2990    In the above case the values for d_nnz,o_nnz are:
2991 .vb
2992      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2993      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2994      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2995 .ve
2996    Here the space allocated is sum of all the above values i.e 34, and
2997    hence pre-allocation is perfect.
2998 
2999    Level: intermediate
3000 
3001 .keywords: matrix, aij, compressed row, sparse, parallel
3002 
3003 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3004           MPIAIJ
3005 @*/
3006 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3007 {
3008   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3009 
3010   PetscFunctionBegin;
3011   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3012   if (f) {
3013     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3014   }
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3020 /*@C
3021      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3022          CSR format the local rows.
3023 
3024    Collective on MPI_Comm
3025 
3026    Input Parameters:
3027 +  comm - MPI communicator
3028 .  m - number of local rows (Cannot be PETSC_DECIDE)
3029 .  n - This value should be the same as the local size used in creating the
3030        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3031        calculated if N is given) For square matrices n is almost always m.
3032 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3033 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3034 .   i - row indices
3035 .   j - column indices
3036 -   a - matrix values
3037 
3038    Output Parameter:
3039 .   mat - the matrix
3040 
3041    Level: intermediate
3042 
3043    Notes:
3044        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3045      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3046      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3047 
3048        The i and j indices are 0 based
3049 
3050 .keywords: matrix, aij, compressed row, sparse, parallel
3051 
3052 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3053           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3054 @*/
3055 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3056 {
3057   PetscErrorCode ierr;
3058 
3059  PetscFunctionBegin;
3060   if (i[0]) {
3061     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3062   }
3063   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3064   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3065   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3066   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3067   PetscFunctionReturn(0);
3068 }
3069 
3070 #undef __FUNCT__
3071 #define __FUNCT__ "MatCreateMPIAIJ"
3072 /*@C
3073    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3074    (the default parallel PETSc format).  For good matrix assembly performance
3075    the user should preallocate the matrix storage by setting the parameters
3076    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3077    performance can be increased by more than a factor of 50.
3078 
3079    Collective on MPI_Comm
3080 
3081    Input Parameters:
3082 +  comm - MPI communicator
3083 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3084            This value should be the same as the local size used in creating the
3085            y vector for the matrix-vector product y = Ax.
3086 .  n - This value should be the same as the local size used in creating the
3087        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3088        calculated if N is given) For square matrices n is almost always m.
3089 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3090 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3091 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3092            (same value is used for all local rows)
3093 .  d_nnz - array containing the number of nonzeros in the various rows of the
3094            DIAGONAL portion of the local submatrix (possibly different for each row)
3095            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3096            The size of this array is equal to the number of local rows, i.e 'm'.
3097            You must leave room for the diagonal entry even if it is zero.
3098 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3099            submatrix (same value is used for all local rows).
3100 -  o_nnz - array containing the number of nonzeros in the various rows of the
3101            OFF-DIAGONAL portion of the local submatrix (possibly different for
3102            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3103            structure. The size of this array is equal to the number
3104            of local rows, i.e 'm'.
3105 
3106    Output Parameter:
3107 .  A - the matrix
3108 
3109    Notes:
3110    If the *_nnz parameter is given then the *_nz parameter is ignored
3111 
3112    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3113    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3114    storage requirements for this matrix.
3115 
3116    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3117    processor than it must be used on all processors that share the object for
3118    that argument.
3119 
3120    The user MUST specify either the local or global matrix dimensions
3121    (possibly both).
3122 
3123    The parallel matrix is partitioned such that the first m0 rows belong to
3124    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3125    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3126 
3127    The DIAGONAL portion of the local submatrix of a processor can be defined
3128    as the submatrix which is obtained by extraction the part corresponding
3129    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3130    first row that belongs to the processor, and r2 is the last row belonging
3131    to the this processor. This is a square mxm matrix. The remaining portion
3132    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3133 
3134    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3135 
3136    When calling this routine with a single process communicator, a matrix of
3137    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3138    type of communicator, use the construction mechanism:
3139      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3140 
3141    By default, this format uses inodes (identical nodes) when possible.
3142    We search for consecutive rows with the same nonzero structure, thereby
3143    reusing matrix information to achieve increased efficiency.
3144 
3145    Options Database Keys:
3146 +  -mat_no_inode  - Do not use inodes
3147 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3148 -  -mat_aij_oneindex - Internally use indexing starting at 1
3149         rather than 0.  Note that when calling MatSetValues(),
3150         the user still MUST index entries starting at 0!
3151 
3152 
3153    Example usage:
3154 
3155    Consider the following 8x8 matrix with 34 non-zero values, that is
3156    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3157    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3158    as follows:
3159 
3160 .vb
3161             1  2  0  |  0  3  0  |  0  4
3162     Proc0   0  5  6  |  7  0  0  |  8  0
3163             9  0 10  | 11  0  0  | 12  0
3164     -------------------------------------
3165            13  0 14  | 15 16 17  |  0  0
3166     Proc1   0 18  0  | 19 20 21  |  0  0
3167             0  0  0  | 22 23  0  | 24  0
3168     -------------------------------------
3169     Proc2  25 26 27  |  0  0 28  | 29  0
3170            30  0  0  | 31 32 33  |  0 34
3171 .ve
3172 
3173    This can be represented as a collection of submatrices as:
3174 
3175 .vb
3176       A B C
3177       D E F
3178       G H I
3179 .ve
3180 
3181    Where the submatrices A,B,C are owned by proc0, D,E,F are
3182    owned by proc1, G,H,I are owned by proc2.
3183 
3184    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3185    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3186    The 'M','N' parameters are 8,8, and have the same values on all procs.
3187 
3188    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3189    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3190    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3191    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3192    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3193    matrix, ans [DF] as another SeqAIJ matrix.
3194 
3195    When d_nz, o_nz parameters are specified, d_nz storage elements are
3196    allocated for every row of the local diagonal submatrix, and o_nz
3197    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3198    One way to choose d_nz and o_nz is to use the max nonzerors per local
3199    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3200    In this case, the values of d_nz,o_nz are:
3201 .vb
3202      proc0 : dnz = 2, o_nz = 2
3203      proc1 : dnz = 3, o_nz = 2
3204      proc2 : dnz = 1, o_nz = 4
3205 .ve
3206    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3207    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3208    for proc3. i.e we are using 12+15+10=37 storage locations to store
3209    34 values.
3210 
3211    When d_nnz, o_nnz parameters are specified, the storage is specified
3212    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3213    In the above case the values for d_nnz,o_nnz are:
3214 .vb
3215      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3216      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3217      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3218 .ve
3219    Here the space allocated is sum of all the above values i.e 34, and
3220    hence pre-allocation is perfect.
3221 
3222    Level: intermediate
3223 
3224 .keywords: matrix, aij, compressed row, sparse, parallel
3225 
3226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3227           MPIAIJ, MatCreateMPIAIJWithArrays()
3228 @*/
3229 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3230 {
3231   PetscErrorCode ierr;
3232   PetscMPIInt    size;
3233 
3234   PetscFunctionBegin;
3235   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3236   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3237   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3238   if (size > 1) {
3239     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3240     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3241   } else {
3242     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3243     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3244   }
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3250 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3251 {
3252   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3253 
3254   PetscFunctionBegin;
3255   *Ad     = a->A;
3256   *Ao     = a->B;
3257   *colmap = a->garray;
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3263 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3264 {
3265   PetscErrorCode ierr;
3266   PetscInt       i;
3267   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3268 
3269   PetscFunctionBegin;
3270   if (coloring->ctype == IS_COLORING_GLOBAL) {
3271     ISColoringValue *allcolors,*colors;
3272     ISColoring      ocoloring;
3273 
3274     /* set coloring for diagonal portion */
3275     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3276 
3277     /* set coloring for off-diagonal portion */
3278     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3279     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3280     for (i=0; i<a->B->cmap.n; i++) {
3281       colors[i] = allcolors[a->garray[i]];
3282     }
3283     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3284     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3285     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3286     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3287   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3288     ISColoringValue *colors;
3289     PetscInt        *larray;
3290     ISColoring      ocoloring;
3291 
3292     /* set coloring for diagonal portion */
3293     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3294     for (i=0; i<a->A->cmap.n; i++) {
3295       larray[i] = i + A->cmap.rstart;
3296     }
3297     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3298     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3299     for (i=0; i<a->A->cmap.n; i++) {
3300       colors[i] = coloring->colors[larray[i]];
3301     }
3302     ierr = PetscFree(larray);CHKERRQ(ierr);
3303     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3304     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3305     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3306 
3307     /* set coloring for off-diagonal portion */
3308     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3309     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3310     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3311     for (i=0; i<a->B->cmap.n; i++) {
3312       colors[i] = coloring->colors[larray[i]];
3313     }
3314     ierr = PetscFree(larray);CHKERRQ(ierr);
3315     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3316     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3317     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3318   } else {
3319     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3320   }
3321 
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 #if defined(PETSC_HAVE_ADIC)
3326 #undef __FUNCT__
3327 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3328 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3329 {
3330   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3331   PetscErrorCode ierr;
3332 
3333   PetscFunctionBegin;
3334   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3335   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3336   PetscFunctionReturn(0);
3337 }
3338 #endif
3339 
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3342 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3343 {
3344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3345   PetscErrorCode ierr;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3349   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMerge"
3355 /*@C
3356       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3357                  matrices from each processor
3358 
3359     Collective on MPI_Comm
3360 
3361    Input Parameters:
3362 +    comm - the communicators the parallel matrix will live on
3363 .    inmat - the input sequential matrices
3364 .    n - number of local columns (or PETSC_DECIDE)
3365 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3366 
3367    Output Parameter:
3368 .    outmat - the parallel matrix generated
3369 
3370     Level: advanced
3371 
3372    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3373 
3374 @*/
3375 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3376 {
3377   PetscErrorCode ierr;
3378   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3379   PetscInt       *indx;
3380   PetscScalar    *values;
3381 
3382   PetscFunctionBegin;
3383   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3384   if (scall == MAT_INITIAL_MATRIX){
3385     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3386     if (n == PETSC_DECIDE){
3387       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3388     }
3389     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3390     rstart -= m;
3391 
3392     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3393     for (i=0;i<m;i++) {
3394       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3395       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3396       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3397     }
3398     /* This routine will ONLY return MPIAIJ type matrix */
3399     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3400     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3401     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3402     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3403     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3404 
3405   } else if (scall == MAT_REUSE_MATRIX){
3406     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3407   } else {
3408     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3409   }
3410 
3411   for (i=0;i<m;i++) {
3412     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3413     Ii    = i + rstart;
3414     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3415     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3416   }
3417   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3418   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3419   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3420 
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatFileSplit"
3426 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3427 {
3428   PetscErrorCode    ierr;
3429   PetscMPIInt       rank;
3430   PetscInt          m,N,i,rstart,nnz;
3431   size_t            len;
3432   const PetscInt    *indx;
3433   PetscViewer       out;
3434   char              *name;
3435   Mat               B;
3436   const PetscScalar *values;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3440   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3441   /* Should this be the type of the diagonal block of A? */
3442   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3443   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3444   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3445   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3446   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3447   for (i=0;i<m;i++) {
3448     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3449     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3450     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3451   }
3452   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3453   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3454 
3455   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3456   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3457   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3458   sprintf(name,"%s.%d",outfile,rank);
3459   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3460   ierr = PetscFree(name);
3461   ierr = MatView(B,out);CHKERRQ(ierr);
3462   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3463   ierr = MatDestroy(B);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3470 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3471 {
3472   PetscErrorCode       ierr;
3473   Mat_Merge_SeqsToMPI  *merge;
3474   PetscContainer       container;
3475 
3476   PetscFunctionBegin;
3477   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3478   if (container) {
3479     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3483     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3484     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3485     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3486     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3487     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3488     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3489     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3490     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3491 
3492     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3493     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3494   }
3495   ierr = PetscFree(merge);CHKERRQ(ierr);
3496 
3497   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3498   PetscFunctionReturn(0);
3499 }
3500 
3501 #include "src/mat/utils/freespace.h"
3502 #include "petscbt.h"
3503 static PetscEvent logkey_seqstompinum = 0;
3504 #undef __FUNCT__
3505 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3506 /*@C
3507       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3508                  matrices from each processor
3509 
3510     Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +    comm - the communicators the parallel matrix will live on
3514 .    seqmat - the input sequential matrices
3515 .    m - number of local rows (or PETSC_DECIDE)
3516 .    n - number of local columns (or PETSC_DECIDE)
3517 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3518 
3519    Output Parameter:
3520 .    mpimat - the parallel matrix generated
3521 
3522     Level: advanced
3523 
3524    Notes:
3525      The dimensions of the sequential matrix in each processor MUST be the same.
3526      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3527      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3528 @*/
3529 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3530 {
3531   PetscErrorCode       ierr;
3532   MPI_Comm             comm=mpimat->comm;
3533   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3534   PetscMPIInt          size,rank,taga,*len_s;
3535   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3536   PetscInt             proc,m;
3537   PetscInt             **buf_ri,**buf_rj;
3538   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3539   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3540   MPI_Request          *s_waits,*r_waits;
3541   MPI_Status           *status;
3542   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3543   Mat_Merge_SeqsToMPI  *merge;
3544   PetscContainer       container;
3545 
3546   PetscFunctionBegin;
3547   if (!logkey_seqstompinum) {
3548     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3549   }
3550   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3551 
3552   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3553   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3554 
3555   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3556   if (container) {
3557     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3558   }
3559   bi     = merge->bi;
3560   bj     = merge->bj;
3561   buf_ri = merge->buf_ri;
3562   buf_rj = merge->buf_rj;
3563 
3564   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3565   owners = merge->rowmap.range;
3566   len_s  = merge->len_s;
3567 
3568   /* send and recv matrix values */
3569   /*-----------------------------*/
3570   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3571   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3572 
3573   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3574   for (proc=0,k=0; proc<size; proc++){
3575     if (!len_s[proc]) continue;
3576     i = owners[proc];
3577     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3578     k++;
3579   }
3580 
3581   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3582   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3583   ierr = PetscFree(status);CHKERRQ(ierr);
3584 
3585   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3586   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3587 
3588   /* insert mat values of mpimat */
3589   /*----------------------------*/
3590   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3591   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3592   nextrow = buf_ri_k + merge->nrecv;
3593   nextai  = nextrow + merge->nrecv;
3594 
3595   for (k=0; k<merge->nrecv; k++){
3596     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3597     nrows = *(buf_ri_k[k]);
3598     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3599     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3600   }
3601 
3602   /* set values of ba */
3603   m = merge->rowmap.n;
3604   for (i=0; i<m; i++) {
3605     arow = owners[rank] + i;
3606     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3607     bnzi = bi[i+1] - bi[i];
3608     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3609 
3610     /* add local non-zero vals of this proc's seqmat into ba */
3611     anzi = ai[arow+1] - ai[arow];
3612     aj   = a->j + ai[arow];
3613     aa   = a->a + ai[arow];
3614     nextaj = 0;
3615     for (j=0; nextaj<anzi; j++){
3616       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3617         ba_i[j] += aa[nextaj++];
3618       }
3619     }
3620 
3621     /* add received vals into ba */
3622     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3623       /* i-th row */
3624       if (i == *nextrow[k]) {
3625         anzi = *(nextai[k]+1) - *nextai[k];
3626         aj   = buf_rj[k] + *(nextai[k]);
3627         aa   = abuf_r[k] + *(nextai[k]);
3628         nextaj = 0;
3629         for (j=0; nextaj<anzi; j++){
3630           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3631             ba_i[j] += aa[nextaj++];
3632           }
3633         }
3634         nextrow[k]++; nextai[k]++;
3635       }
3636     }
3637     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3638   }
3639   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641 
3642   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3643   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3644   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3645   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3646   PetscFunctionReturn(0);
3647 }
3648 
3649 static PetscEvent logkey_seqstompisym = 0;
3650 #undef __FUNCT__
3651 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3652 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3653 {
3654   PetscErrorCode       ierr;
3655   Mat                  B_mpi;
3656   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3657   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3658   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3659   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3660   PetscInt             len,proc,*dnz,*onz;
3661   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3662   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3663   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3664   MPI_Status           *status;
3665   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3666   PetscBT              lnkbt;
3667   Mat_Merge_SeqsToMPI  *merge;
3668   PetscContainer       container;
3669 
3670   PetscFunctionBegin;
3671   if (!logkey_seqstompisym) {
3672     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3673   }
3674   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3675 
3676   /* make sure it is a PETSc comm */
3677   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3679   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3680 
3681   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3682   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3683 
3684   /* determine row ownership */
3685   /*---------------------------------------------------------*/
3686   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3687   merge->rowmap.n = m;
3688   merge->rowmap.N = M;
3689   merge->rowmap.bs = 1;
3690   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3691   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3692   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3693 
3694   m      = merge->rowmap.n;
3695   M      = merge->rowmap.N;
3696   owners = merge->rowmap.range;
3697 
3698   /* determine the number of messages to send, their lengths */
3699   /*---------------------------------------------------------*/
3700   len_s  = merge->len_s;
3701 
3702   len = 0;  /* length of buf_si[] */
3703   merge->nsend = 0;
3704   for (proc=0; proc<size; proc++){
3705     len_si[proc] = 0;
3706     if (proc == rank){
3707       len_s[proc] = 0;
3708     } else {
3709       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3710       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3711     }
3712     if (len_s[proc]) {
3713       merge->nsend++;
3714       nrows = 0;
3715       for (i=owners[proc]; i<owners[proc+1]; i++){
3716         if (ai[i+1] > ai[i]) nrows++;
3717       }
3718       len_si[proc] = 2*(nrows+1);
3719       len += len_si[proc];
3720     }
3721   }
3722 
3723   /* determine the number and length of messages to receive for ij-structure */
3724   /*-------------------------------------------------------------------------*/
3725   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3726   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3727 
3728   /* post the Irecv of j-structure */
3729   /*-------------------------------*/
3730   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3731   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3732 
3733   /* post the Isend of j-structure */
3734   /*--------------------------------*/
3735   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3736   sj_waits = si_waits + merge->nsend;
3737 
3738   for (proc=0, k=0; proc<size; proc++){
3739     if (!len_s[proc]) continue;
3740     i = owners[proc];
3741     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3742     k++;
3743   }
3744 
3745   /* receives and sends of j-structure are complete */
3746   /*------------------------------------------------*/
3747   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3748   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3749 
3750   /* send and recv i-structure */
3751   /*---------------------------*/
3752   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3753   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3754 
3755   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3756   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3757   for (proc=0,k=0; proc<size; proc++){
3758     if (!len_s[proc]) continue;
3759     /* form outgoing message for i-structure:
3760          buf_si[0]:                 nrows to be sent
3761                [1:nrows]:           row index (global)
3762                [nrows+1:2*nrows+1]: i-structure index
3763     */
3764     /*-------------------------------------------*/
3765     nrows = len_si[proc]/2 - 1;
3766     buf_si_i    = buf_si + nrows+1;
3767     buf_si[0]   = nrows;
3768     buf_si_i[0] = 0;
3769     nrows = 0;
3770     for (i=owners[proc]; i<owners[proc+1]; i++){
3771       anzi = ai[i+1] - ai[i];
3772       if (anzi) {
3773         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3774         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3775         nrows++;
3776       }
3777     }
3778     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3779     k++;
3780     buf_si += len_si[proc];
3781   }
3782 
3783   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3784   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3785 
3786   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3787   for (i=0; i<merge->nrecv; i++){
3788     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3789   }
3790 
3791   ierr = PetscFree(len_si);CHKERRQ(ierr);
3792   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3793   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3794   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3795   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3796   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3797   ierr = PetscFree(status);CHKERRQ(ierr);
3798 
3799   /* compute a local seq matrix in each processor */
3800   /*----------------------------------------------*/
3801   /* allocate bi array and free space for accumulating nonzero column info */
3802   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3803   bi[0] = 0;
3804 
3805   /* create and initialize a linked list */
3806   nlnk = N+1;
3807   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3808 
3809   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3810   len = 0;
3811   len  = ai[owners[rank+1]] - ai[owners[rank]];
3812   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3813   current_space = free_space;
3814 
3815   /* determine symbolic info for each local row */
3816   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3817   nextrow = buf_ri_k + merge->nrecv;
3818   nextai  = nextrow + merge->nrecv;
3819   for (k=0; k<merge->nrecv; k++){
3820     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3821     nrows = *buf_ri_k[k];
3822     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3823     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3824   }
3825 
3826   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3827   len = 0;
3828   for (i=0;i<m;i++) {
3829     bnzi   = 0;
3830     /* add local non-zero cols of this proc's seqmat into lnk */
3831     arow   = owners[rank] + i;
3832     anzi   = ai[arow+1] - ai[arow];
3833     aj     = a->j + ai[arow];
3834     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3835     bnzi += nlnk;
3836     /* add received col data into lnk */
3837     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3838       if (i == *nextrow[k]) { /* i-th row */
3839         anzi = *(nextai[k]+1) - *nextai[k];
3840         aj   = buf_rj[k] + *nextai[k];
3841         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3842         bnzi += nlnk;
3843         nextrow[k]++; nextai[k]++;
3844       }
3845     }
3846     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3847 
3848     /* if free space is not available, make more free space */
3849     if (current_space->local_remaining<bnzi) {
3850       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3851       nspacedouble++;
3852     }
3853     /* copy data into free space, then initialize lnk */
3854     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3855     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3856 
3857     current_space->array           += bnzi;
3858     current_space->local_used      += bnzi;
3859     current_space->local_remaining -= bnzi;
3860 
3861     bi[i+1] = bi[i] + bnzi;
3862   }
3863 
3864   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3865 
3866   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3867   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3868   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3869 
3870   /* create symbolic parallel matrix B_mpi */
3871   /*---------------------------------------*/
3872   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3873   if (n==PETSC_DECIDE) {
3874     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3875   } else {
3876     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3877   }
3878   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3879   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3880   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3881 
3882   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3883   B_mpi->assembled     = PETSC_FALSE;
3884   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3885   merge->bi            = bi;
3886   merge->bj            = bj;
3887   merge->buf_ri        = buf_ri;
3888   merge->buf_rj        = buf_rj;
3889   merge->coi           = PETSC_NULL;
3890   merge->coj           = PETSC_NULL;
3891   merge->owners_co     = PETSC_NULL;
3892 
3893   /* attach the supporting struct to B_mpi for reuse */
3894   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3895   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3896   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3897   *mpimat = B_mpi;
3898 
3899   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3900   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3901   PetscFunctionReturn(0);
3902 }
3903 
3904 static PetscEvent logkey_seqstompi = 0;
3905 #undef __FUNCT__
3906 #define __FUNCT__ "MatMerge_SeqsToMPI"
3907 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3908 {
3909   PetscErrorCode   ierr;
3910 
3911   PetscFunctionBegin;
3912   if (!logkey_seqstompi) {
3913     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3914   }
3915   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3916   if (scall == MAT_INITIAL_MATRIX){
3917     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3918   }
3919   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3920   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3921   PetscFunctionReturn(0);
3922 }
3923 static PetscEvent logkey_getlocalmat = 0;
3924 #undef __FUNCT__
3925 #define __FUNCT__ "MatGetLocalMat"
3926 /*@C
3927      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3928 
3929     Not Collective
3930 
3931    Input Parameters:
3932 +    A - the matrix
3933 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3934 
3935    Output Parameter:
3936 .    A_loc - the local sequential matrix generated
3937 
3938     Level: developer
3939 
3940 @*/
3941 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3942 {
3943   PetscErrorCode  ierr;
3944   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3945   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3946   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3947   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3948   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3949   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3950 
3951   PetscFunctionBegin;
3952   if (!logkey_getlocalmat) {
3953     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3954   }
3955   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3956   if (scall == MAT_INITIAL_MATRIX){
3957     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3958     ci[0] = 0;
3959     for (i=0; i<am; i++){
3960       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3961     }
3962     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3963     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3964     k = 0;
3965     for (i=0; i<am; i++) {
3966       ncols_o = bi[i+1] - bi[i];
3967       ncols_d = ai[i+1] - ai[i];
3968       /* off-diagonal portion of A */
3969       for (jo=0; jo<ncols_o; jo++) {
3970         col = cmap[*bj];
3971         if (col >= cstart) break;
3972         cj[k]   = col; bj++;
3973         ca[k++] = *ba++;
3974       }
3975       /* diagonal portion of A */
3976       for (j=0; j<ncols_d; j++) {
3977         cj[k]   = cstart + *aj++;
3978         ca[k++] = *aa++;
3979       }
3980       /* off-diagonal portion of A */
3981       for (j=jo; j<ncols_o; j++) {
3982         cj[k]   = cmap[*bj++];
3983         ca[k++] = *ba++;
3984       }
3985     }
3986     /* put together the new matrix */
3987     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3988     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3989     /* Since these are PETSc arrays, change flags to free them as necessary. */
3990     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3991     mat->free_a  = PETSC_TRUE;
3992     mat->free_ij = PETSC_TRUE;
3993     mat->nonew   = 0;
3994   } else if (scall == MAT_REUSE_MATRIX){
3995     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3996     ci = mat->i; cj = mat->j; ca = mat->a;
3997     for (i=0; i<am; i++) {
3998       /* off-diagonal portion of A */
3999       ncols_o = bi[i+1] - bi[i];
4000       for (jo=0; jo<ncols_o; jo++) {
4001         col = cmap[*bj];
4002         if (col >= cstart) break;
4003         *ca++ = *ba++; bj++;
4004       }
4005       /* diagonal portion of A */
4006       ncols_d = ai[i+1] - ai[i];
4007       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4008       /* off-diagonal portion of A */
4009       for (j=jo; j<ncols_o; j++) {
4010         *ca++ = *ba++; bj++;
4011       }
4012     }
4013   } else {
4014     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4015   }
4016 
4017   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4018   PetscFunctionReturn(0);
4019 }
4020 
4021 static PetscEvent logkey_getlocalmatcondensed = 0;
4022 #undef __FUNCT__
4023 #define __FUNCT__ "MatGetLocalMatCondensed"
4024 /*@C
4025      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4026 
4027     Not Collective
4028 
4029    Input Parameters:
4030 +    A - the matrix
4031 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4032 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4033 
4034    Output Parameter:
4035 .    A_loc - the local sequential matrix generated
4036 
4037     Level: developer
4038 
4039 @*/
4040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4041 {
4042   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4043   PetscErrorCode    ierr;
4044   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4045   IS                isrowa,iscola;
4046   Mat               *aloc;
4047 
4048   PetscFunctionBegin;
4049   if (!logkey_getlocalmatcondensed) {
4050     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4051   }
4052   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4053   if (!row){
4054     start = A->rmap.rstart; end = A->rmap.rend;
4055     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4056   } else {
4057     isrowa = *row;
4058   }
4059   if (!col){
4060     start = A->cmap.rstart;
4061     cmap  = a->garray;
4062     nzA   = a->A->cmap.n;
4063     nzB   = a->B->cmap.n;
4064     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4065     ncols = 0;
4066     for (i=0; i<nzB; i++) {
4067       if (cmap[i] < start) idx[ncols++] = cmap[i];
4068       else break;
4069     }
4070     imark = i;
4071     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4072     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4073     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4074     ierr = PetscFree(idx);CHKERRQ(ierr);
4075   } else {
4076     iscola = *col;
4077   }
4078   if (scall != MAT_INITIAL_MATRIX){
4079     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4080     aloc[0] = *A_loc;
4081   }
4082   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4083   *A_loc = aloc[0];
4084   ierr = PetscFree(aloc);CHKERRQ(ierr);
4085   if (!row){
4086     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4087   }
4088   if (!col){
4089     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4090   }
4091   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4092   PetscFunctionReturn(0);
4093 }
4094 
4095 static PetscEvent logkey_GetBrowsOfAcols = 0;
4096 #undef __FUNCT__
4097 #define __FUNCT__ "MatGetBrowsOfAcols"
4098 /*@C
4099     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4100 
4101     Collective on Mat
4102 
4103    Input Parameters:
4104 +    A,B - the matrices in mpiaij format
4105 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4106 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4107 
4108    Output Parameter:
4109 +    rowb, colb - index sets of rows and columns of B to extract
4110 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4111 -    B_seq - the sequential matrix generated
4112 
4113     Level: developer
4114 
4115 @*/
4116 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4117 {
4118   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4119   PetscErrorCode    ierr;
4120   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4121   IS                isrowb,iscolb;
4122   Mat               *bseq;
4123 
4124   PetscFunctionBegin;
4125   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4126     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4127   }
4128   if (!logkey_GetBrowsOfAcols) {
4129     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4130   }
4131   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4132 
4133   if (scall == MAT_INITIAL_MATRIX){
4134     start = A->cmap.rstart;
4135     cmap  = a->garray;
4136     nzA   = a->A->cmap.n;
4137     nzB   = a->B->cmap.n;
4138     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4139     ncols = 0;
4140     for (i=0; i<nzB; i++) {  /* row < local row index */
4141       if (cmap[i] < start) idx[ncols++] = cmap[i];
4142       else break;
4143     }
4144     imark = i;
4145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4148     ierr = PetscFree(idx);CHKERRQ(ierr);
4149     *brstart = imark;
4150     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4151   } else {
4152     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4153     isrowb = *rowb; iscolb = *colb;
4154     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4155     bseq[0] = *B_seq;
4156   }
4157   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4158   *B_seq = bseq[0];
4159   ierr = PetscFree(bseq);CHKERRQ(ierr);
4160   if (!rowb){
4161     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4162   } else {
4163     *rowb = isrowb;
4164   }
4165   if (!colb){
4166     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4167   } else {
4168     *colb = iscolb;
4169   }
4170   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4171   PetscFunctionReturn(0);
4172 }
4173 
4174 static PetscEvent logkey_GetBrowsOfAocols = 0;
4175 #undef __FUNCT__
4176 #define __FUNCT__ "MatGetBrowsOfAoCols"
4177 /*@C
4178     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4179     of the OFF-DIAGONAL portion of local A
4180 
4181     Collective on Mat
4182 
4183    Input Parameters:
4184 +    A,B - the matrices in mpiaij format
4185 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4186 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4187 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4188 
4189    Output Parameter:
4190 +    B_oth - the sequential matrix generated
4191 
4192     Level: developer
4193 
4194 @*/
4195 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4196 {
4197   VecScatter_MPI_General *gen_to,*gen_from;
4198   PetscErrorCode         ierr;
4199   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4200   Mat_SeqAIJ             *b_oth;
4201   VecScatter             ctx=a->Mvctx;
4202   MPI_Comm               comm=ctx->comm;
4203   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4204   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4205   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4206   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4207   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4208   MPI_Status             *sstatus,rstatus;
4209   PetscInt               *cols,sbs,rbs;
4210   PetscScalar            *vals;
4211 
4212   PetscFunctionBegin;
4213   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4214     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4215   }
4216   if (!logkey_GetBrowsOfAocols) {
4217     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4218   }
4219   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4220   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4221 
4222   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4223   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4224   rvalues  = gen_from->values; /* holds the length of receiving row */
4225   svalues  = gen_to->values;   /* holds the length of sending row */
4226   nrecvs   = gen_from->n;
4227   nsends   = gen_to->n;
4228 
4229   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4230   srow     = gen_to->indices;   /* local row index to be sent */
4231   sstarts  = gen_to->starts;
4232   sprocs   = gen_to->procs;
4233   sstatus  = gen_to->sstatus;
4234   sbs      = gen_to->bs;
4235   rstarts  = gen_from->starts;
4236   rprocs   = gen_from->procs;
4237   rbs      = gen_from->bs;
4238 
4239   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4240   if (scall == MAT_INITIAL_MATRIX){
4241     /* i-array */
4242     /*---------*/
4243     /*  post receives */
4244     for (i=0; i<nrecvs; i++){
4245       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4246       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4247       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4248     }
4249 
4250     /* pack the outgoing message */
4251     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4252     rstartsj = sstartsj + nsends +1;
4253     sstartsj[0] = 0;  rstartsj[0] = 0;
4254     len = 0; /* total length of j or a array to be sent */
4255     k = 0;
4256     for (i=0; i<nsends; i++){
4257       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4258       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4259       for (j=0; j<nrows; j++) {
4260         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4261         for (l=0; l<sbs; l++){
4262           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4263           rowlen[j*sbs+l] = ncols;
4264           len += ncols;
4265           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4266         }
4267         k++;
4268       }
4269       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4270       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4271     }
4272     /* recvs and sends of i-array are completed */
4273     i = nrecvs;
4274     while (i--) {
4275       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4276     }
4277     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4278 
4279     /* allocate buffers for sending j and a arrays */
4280     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4281     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4282 
4283     /* create i-array of B_oth */
4284     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4285     b_othi[0] = 0;
4286     len = 0; /* total length of j or a array to be received */
4287     k = 0;
4288     for (i=0; i<nrecvs; i++){
4289       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4290       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4291       for (j=0; j<nrows; j++) {
4292         b_othi[k+1] = b_othi[k] + rowlen[j];
4293         len += rowlen[j]; k++;
4294       }
4295       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4296     }
4297 
4298     /* allocate space for j and a arrrays of B_oth */
4299     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4300     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4301 
4302     /* j-array */
4303     /*---------*/
4304     /*  post receives of j-array */
4305     for (i=0; i<nrecvs; i++){
4306       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4307       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4308     }
4309 
4310     /* pack the outgoing message j-array */
4311     k = 0;
4312     for (i=0; i<nsends; i++){
4313       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4314       bufJ = bufj+sstartsj[i];
4315       for (j=0; j<nrows; j++) {
4316         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4317         for (ll=0; ll<sbs; ll++){
4318           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4319           for (l=0; l<ncols; l++){
4320             *bufJ++ = cols[l];
4321           }
4322           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4323         }
4324       }
4325       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4326     }
4327 
4328     /* recvs and sends of j-array are completed */
4329     i = nrecvs;
4330     while (i--) {
4331       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4332     }
4333     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4334   } else if (scall == MAT_REUSE_MATRIX){
4335     sstartsj = *startsj;
4336     rstartsj = sstartsj + nsends +1;
4337     bufa     = *bufa_ptr;
4338     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4339     b_otha   = b_oth->a;
4340   } else {
4341     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4342   }
4343 
4344   /* a-array */
4345   /*---------*/
4346   /*  post receives of a-array */
4347   for (i=0; i<nrecvs; i++){
4348     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4349     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4350   }
4351 
4352   /* pack the outgoing message a-array */
4353   k = 0;
4354   for (i=0; i<nsends; i++){
4355     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4356     bufA = bufa+sstartsj[i];
4357     for (j=0; j<nrows; j++) {
4358       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4359       for (ll=0; ll<sbs; ll++){
4360         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4361         for (l=0; l<ncols; l++){
4362           *bufA++ = vals[l];
4363         }
4364         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4365       }
4366     }
4367     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4368   }
4369   /* recvs and sends of a-array are completed */
4370   i = nrecvs;
4371   while (i--) {
4372     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4373   }
4374   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4375   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4376 
4377   if (scall == MAT_INITIAL_MATRIX){
4378     /* put together the new matrix */
4379     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4380 
4381     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4382     /* Since these are PETSc arrays, change flags to free them as necessary. */
4383     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4384     b_oth->free_a  = PETSC_TRUE;
4385     b_oth->free_ij = PETSC_TRUE;
4386     b_oth->nonew   = 0;
4387 
4388     ierr = PetscFree(bufj);CHKERRQ(ierr);
4389     if (!startsj || !bufa_ptr){
4390       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4391       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4392     } else {
4393       *startsj  = sstartsj;
4394       *bufa_ptr = bufa;
4395     }
4396   }
4397   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4398   PetscFunctionReturn(0);
4399 }
4400 
4401 #undef __FUNCT__
4402 #define __FUNCT__ "MatGetCommunicationStructs"
4403 /*@C
4404   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4405 
4406   Not Collective
4407 
4408   Input Parameters:
4409 . A - The matrix in mpiaij format
4410 
4411   Output Parameter:
4412 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4413 . colmap - A map from global column index to local index into lvec
4414 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4415 
4416   Level: developer
4417 
4418 @*/
4419 #if defined (PETSC_USE_CTABLE)
4420 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4421 #else
4422 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4423 #endif
4424 {
4425   Mat_MPIAIJ *a;
4426 
4427   PetscFunctionBegin;
4428   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4429   PetscValidPointer(lvec, 2)
4430   PetscValidPointer(colmap, 3)
4431   PetscValidPointer(multScatter, 4)
4432   a = (Mat_MPIAIJ *) A->data;
4433   if (lvec) *lvec = a->lvec;
4434   if (colmap) *colmap = a->colmap;
4435   if (multScatter) *multScatter = a->Mvctx;
4436   PetscFunctionReturn(0);
4437 }
4438 
4439 EXTERN_C_BEGIN
4440 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4441 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4442 EXTERN_C_END
4443 
4444 /*MC
4445    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4446 
4447    Options Database Keys:
4448 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4449 
4450   Level: beginner
4451 
4452 .seealso: MatCreateMPIAIJ
4453 M*/
4454 
4455 EXTERN_C_BEGIN
4456 #undef __FUNCT__
4457 #define __FUNCT__ "MatCreate_MPIAIJ"
4458 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4459 {
4460   Mat_MPIAIJ     *b;
4461   PetscErrorCode ierr;
4462   PetscMPIInt    size;
4463 
4464   PetscFunctionBegin;
4465   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4466 
4467   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4468   B->data         = (void*)b;
4469   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4470   B->factor       = 0;
4471   B->rmap.bs      = 1;
4472   B->assembled    = PETSC_FALSE;
4473   B->mapping      = 0;
4474 
4475   B->insertmode      = NOT_SET_VALUES;
4476   b->size            = size;
4477   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4478 
4479   /* build cache for off array entries formed */
4480   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4481   b->donotstash  = PETSC_FALSE;
4482   b->colmap      = 0;
4483   b->garray      = 0;
4484   b->roworiented = PETSC_TRUE;
4485 
4486   /* stuff used for matrix vector multiply */
4487   b->lvec      = PETSC_NULL;
4488   b->Mvctx     = PETSC_NULL;
4489 
4490   /* stuff for MatGetRow() */
4491   b->rowindices   = 0;
4492   b->rowvalues    = 0;
4493   b->getrowactive = PETSC_FALSE;
4494 
4495 
4496   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4497                                      "MatStoreValues_MPIAIJ",
4498                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4499   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4500                                      "MatRetrieveValues_MPIAIJ",
4501                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4502   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4503 				     "MatGetDiagonalBlock_MPIAIJ",
4504                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4505   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4506 				     "MatIsTranspose_MPIAIJ",
4507 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4508   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4509 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4510 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4512 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4513 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4515 				     "MatDiagonalScaleLocal_MPIAIJ",
4516 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4518                                      "MatConvert_MPIAIJ_MPICSRPERM",
4519                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4521                                      "MatConvert_MPIAIJ_MPICRL",
4522                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4523   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4524   PetscFunctionReturn(0);
4525 }
4526 EXTERN_C_END
4527 
4528 #undef __FUNCT__
4529 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4530 /*@C
4531      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4532          and "off-diagonal" part of the matrix in CSR format.
4533 
4534    Collective on MPI_Comm
4535 
4536    Input Parameters:
4537 +  comm - MPI communicator
4538 .  m - number of local rows (Cannot be PETSC_DECIDE)
4539 .  n - This value should be the same as the local size used in creating the
4540        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4541        calculated if N is given) For square matrices n is almost always m.
4542 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4543 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4544 .   i - row indices for "diagonal" portion of matrix
4545 .   j - column indices
4546 .   a - matrix values
4547 .   oi - row indices for "off-diagonal" portion of matrix
4548 .   oj - column indices
4549 -   oa - matrix values
4550 
4551    Output Parameter:
4552 .   mat - the matrix
4553 
4554    Level: advanced
4555 
4556    Notes:
4557        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4558 
4559        The i and j indices are 0 based
4560 
4561        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4562 
4563 
4564 .keywords: matrix, aij, compressed row, sparse, parallel
4565 
4566 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4567           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4568 @*/
4569 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4570 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4571 {
4572   PetscErrorCode ierr;
4573   Mat_MPIAIJ     *maij;
4574 
4575  PetscFunctionBegin;
4576   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4577   if (i[0]) {
4578     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4579   }
4580   if (oi[0]) {
4581     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4582   }
4583   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4584   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4585   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4586   maij = (Mat_MPIAIJ*) (*mat)->data;
4587   maij->donotstash     = PETSC_TRUE;
4588   (*mat)->preallocated = PETSC_TRUE;
4589 
4590   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4591   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4592   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4593 
4594   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4595   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4596 
4597   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4601 
4602   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4603   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4604   PetscFunctionReturn(0);
4605 }
4606 
4607 /*
4608     Special version for direct calls from Fortran
4609 */
4610 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4611 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4612 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4613 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4614 #endif
4615 
4616 /* Change these macros so can be used in void function */
4617 #undef CHKERRQ
4618 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4619 #undef SETERRQ2
4620 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4621 #undef SETERRQ
4622 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4623 
4624 EXTERN_C_BEGIN
4625 #undef __FUNCT__
4626 #define __FUNCT__ "matsetvaluesmpiaij_"
4627 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4628 {
4629   Mat            mat = *mmat;
4630   PetscInt       m = *mm, n = *mn;
4631   InsertMode     addv = *maddv;
4632   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4633   PetscScalar    value;
4634   PetscErrorCode ierr;
4635 
4636   MatPreallocated(mat);
4637   if (mat->insertmode == NOT_SET_VALUES) {
4638     mat->insertmode = addv;
4639   }
4640 #if defined(PETSC_USE_DEBUG)
4641   else if (mat->insertmode != addv) {
4642     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4643   }
4644 #endif
4645   {
4646   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4647   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4648   PetscTruth     roworiented = aij->roworiented;
4649 
4650   /* Some Variables required in the macro */
4651   Mat            A = aij->A;
4652   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4653   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4654   PetscScalar    *aa = a->a;
4655   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4656   Mat            B = aij->B;
4657   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4658   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4659   PetscScalar    *ba = b->a;
4660 
4661   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4662   PetscInt       nonew = a->nonew;
4663   PetscScalar    *ap1,*ap2;
4664 
4665   PetscFunctionBegin;
4666   for (i=0; i<m; i++) {
4667     if (im[i] < 0) continue;
4668 #if defined(PETSC_USE_DEBUG)
4669     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4670 #endif
4671     if (im[i] >= rstart && im[i] < rend) {
4672       row      = im[i] - rstart;
4673       lastcol1 = -1;
4674       rp1      = aj + ai[row];
4675       ap1      = aa + ai[row];
4676       rmax1    = aimax[row];
4677       nrow1    = ailen[row];
4678       low1     = 0;
4679       high1    = nrow1;
4680       lastcol2 = -1;
4681       rp2      = bj + bi[row];
4682       ap2      = ba + bi[row];
4683       rmax2    = bimax[row];
4684       nrow2    = bilen[row];
4685       low2     = 0;
4686       high2    = nrow2;
4687 
4688       for (j=0; j<n; j++) {
4689         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4690         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4691         if (in[j] >= cstart && in[j] < cend){
4692           col = in[j] - cstart;
4693           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4694         } else if (in[j] < 0) continue;
4695 #if defined(PETSC_USE_DEBUG)
4696         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4697 #endif
4698         else {
4699           if (mat->was_assembled) {
4700             if (!aij->colmap) {
4701               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4702             }
4703 #if defined (PETSC_USE_CTABLE)
4704             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4705 	    col--;
4706 #else
4707             col = aij->colmap[in[j]] - 1;
4708 #endif
4709             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4710               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4711               col =  in[j];
4712               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4713               B = aij->B;
4714               b = (Mat_SeqAIJ*)B->data;
4715               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4716               rp2      = bj + bi[row];
4717               ap2      = ba + bi[row];
4718               rmax2    = bimax[row];
4719               nrow2    = bilen[row];
4720               low2     = 0;
4721               high2    = nrow2;
4722               bm       = aij->B->rmap.n;
4723               ba = b->a;
4724             }
4725           } else col = in[j];
4726           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4727         }
4728       }
4729     } else {
4730       if (!aij->donotstash) {
4731         if (roworiented) {
4732           if (ignorezeroentries && v[i*n] == 0.0) continue;
4733           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4734         } else {
4735           if (ignorezeroentries && v[i] == 0.0) continue;
4736           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4737         }
4738       }
4739     }
4740   }}
4741   PetscFunctionReturnVoid();
4742 }
4743 EXTERN_C_END
4744 
4745