xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4bdbc9fa98817dabc77d8a43517f7595b330673f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1438   PetscInt       cstart=A->cmap.rstart,ncol;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444 
1445   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1446   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1447   ai = Aloc->i; aj = Aloc->j;
1448   bi = Bloc->i; bj = Bloc->j;
1449   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1450   cols = d_nnz + na + 1; /* work space to be used by B part */
1451   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1452   for (i=0; i<ai[ma]; i++){
1453     d_nnz[aj[i]] ++;
1454     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1455   }
1456 
1457   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1458   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1459   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1460   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1461 
1462   /* copy over the A part */
1463   array = Aloc->a;
1464   row = A->rmap.rstart;
1465   for (i=0; i<ma; i++) {
1466     ncol = ai[i+1]-ai[i];
1467     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1468     row++; array += ncol; aj += ncol;
1469   }
1470   aj = Aloc->j;
1471   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1472 
1473   /* copy over the B part */
1474   array = Bloc->a;
1475   row = A->rmap.rstart;
1476   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1477   for (i=0; i<mb; i++) {
1478     ncol = bi[i+1]-bi[i];
1479     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1480     row++; array += ncol; cols += ncol;
1481   }
1482   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1483   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1484   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1485   if (matout) {
1486     *matout = B;
1487   } else {
1488     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1489   }
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1495 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1496 {
1497   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1498   Mat            a = aij->A,b = aij->B;
1499   PetscErrorCode ierr;
1500   PetscInt       s1,s2,s3;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1504   if (rr) {
1505     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1506     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1507     /* Overlap communication with computation. */
1508     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1509   }
1510   if (ll) {
1511     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1512     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1513     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1514   }
1515   /* scale  the diagonal block */
1516   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1517 
1518   if (rr) {
1519     /* Do a scatter end and then right scale the off-diagonal block */
1520     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1522   }
1523 
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1529 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1530 {
1531   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1532   PetscErrorCode ierr;
1533 
1534   PetscFunctionBegin;
1535   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1536   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1541 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1542 {
1543   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1544   PetscErrorCode ierr;
1545 
1546   PetscFunctionBegin;
1547   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatEqual_MPIAIJ"
1553 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1554 {
1555   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1556   Mat            a,b,c,d;
1557   PetscTruth     flg;
1558   PetscErrorCode ierr;
1559 
1560   PetscFunctionBegin;
1561   a = matA->A; b = matA->B;
1562   c = matB->A; d = matB->B;
1563 
1564   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1565   if (flg) {
1566     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1567   }
1568   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1569   PetscFunctionReturn(0);
1570 }
1571 
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatCopy_MPIAIJ"
1574 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1575 {
1576   PetscErrorCode ierr;
1577   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1578   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1579 
1580   PetscFunctionBegin;
1581   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1582   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1583     /* because of the column compression in the off-processor part of the matrix a->B,
1584        the number of columns in a->B and b->B may be different, hence we cannot call
1585        the MatCopy() directly on the two parts. If need be, we can provide a more
1586        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1587        then copying the submatrices */
1588     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1589   } else {
1590     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1591     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1598 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1599 {
1600   PetscErrorCode ierr;
1601 
1602   PetscFunctionBegin;
1603   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #include "petscblaslapack.h"
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatAXPY_MPIAIJ"
1610 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1611 {
1612   PetscErrorCode ierr;
1613   PetscInt       i;
1614   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1615   PetscBLASInt   bnz,one=1;
1616   Mat_SeqAIJ     *x,*y;
1617 
1618   PetscFunctionBegin;
1619   if (str == SAME_NONZERO_PATTERN) {
1620     PetscScalar alpha = a;
1621     x = (Mat_SeqAIJ *)xx->A->data;
1622     y = (Mat_SeqAIJ *)yy->A->data;
1623     bnz = (PetscBLASInt)x->nz;
1624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     bnz = (PetscBLASInt)x->nz;
1628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1629   } else if (str == SUBSET_NONZERO_PATTERN) {
1630     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1631 
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     if (y->xtoy && y->XtoY != xx->B) {
1635       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1636       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1637     }
1638     if (!y->xtoy) { /* get xtoy */
1639       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1640       y->XtoY = xx->B;
1641       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1642     }
1643     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1644   } else {
1645     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1646   }
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatConjugate_MPIAIJ"
1654 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1655 {
1656 #if defined(PETSC_USE_COMPLEX)
1657   PetscErrorCode ierr;
1658   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1662   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1663 #else
1664   PetscFunctionBegin;
1665 #endif
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatRealPart_MPIAIJ"
1671 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1672 {
1673   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1674   PetscErrorCode ierr;
1675 
1676   PetscFunctionBegin;
1677   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1678   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1684 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1685 {
1686   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1687   PetscErrorCode ierr;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1691   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #ifdef PETSC_HAVE_PBGL
1696 
1697 #include <boost/parallel/mpi/bsp_process_group.hpp>
1698 #include <boost/graph/distributed/ilu_default_graph.hpp>
1699 #include <boost/graph/distributed/ilu_0_block.hpp>
1700 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1701 #include <boost/graph/distributed/petsc/interface.hpp>
1702 #include <boost/multi_array.hpp>
1703 #include <boost/parallel/distributed_property_map.hpp>
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1707 /*
1708   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1709 */
1710 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1711 {
1712   namespace petsc = boost::distributed::petsc;
1713 
1714   namespace graph_dist = boost::graph::distributed;
1715   using boost::graph::distributed::ilu_default::process_group_type;
1716   using boost::graph::ilu_permuted;
1717 
1718   PetscTruth      row_identity, col_identity;
1719   PetscContainer  c;
1720   PetscInt        m, n, M, N;
1721   PetscErrorCode  ierr;
1722 
1723   PetscFunctionBegin;
1724   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1725   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1726   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1727   if (!row_identity || !col_identity) {
1728     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1729   }
1730 
1731   process_group_type pg;
1732   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1733   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1734   lgraph_type&   level_graph = *lgraph_p;
1735   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1736 
1737   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738   ilu_permuted(level_graph);
1739 
1740   /* put together the new matrix */
1741   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1742   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1743   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1744   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1745   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1746   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1747   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1748   (*fact)->factor = FACTOR_LU;
1749 
1750   ierr = PetscContainerCreate(A->comm, &c);
1751   ierr = PetscContainerSetPointer(c, lgraph_p);
1752   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1758 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1759 {
1760   PetscFunctionBegin;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatSolve_MPIAIJ"
1766 /*
1767   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1768 */
1769 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1770 {
1771   namespace graph_dist = boost::graph::distributed;
1772 
1773   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1774   lgraph_type*   lgraph_p;
1775   PetscContainer c;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1780   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1781   ierr = VecCopy(b, x); CHKERRQ(ierr);
1782 
1783   PetscScalar* array_x;
1784   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1785   PetscInt sx;
1786   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1787 
1788   PetscScalar* array_b;
1789   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1790   PetscInt sb;
1791   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1792 
1793   lgraph_type&   level_graph = *lgraph_p;
1794   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1795 
1796   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1797   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1798                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1799 
1800   typedef boost::iterator_property_map<array_ref_type::iterator,
1801                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1802   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1803                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1804 
1805   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1806 
1807   PetscFunctionReturn(0);
1808 }
1809 #endif
1810 
1811 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1812   PetscInt       nzlocal,nsends,nrecvs;
1813   PetscMPIInt    *send_rank;
1814   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1815   PetscScalar    *sbuf_a,**rbuf_a;
1816   PetscErrorCode (*MatDestroy)(Mat);
1817 } Mat_Redundant;
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1821 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1822 {
1823   PetscErrorCode       ierr;
1824   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1825   PetscInt             i;
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1829   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1830   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1831   for (i=0; i<redund->nrecvs; i++){
1832     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1833     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1834   }
1835   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1836   ierr = PetscFree(redund);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatDestroy_MatRedundant"
1842 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1843 {
1844   PetscErrorCode  ierr;
1845   PetscContainer  container;
1846   Mat_Redundant   *redund=PETSC_NULL;
1847 
1848   PetscFunctionBegin;
1849   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1850   if (container) {
1851     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1852   } else {
1853     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1854   }
1855   A->ops->destroy = redund->MatDestroy;
1856   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1857   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1858   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1864 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1865 {
1866   PetscMPIInt    rank,size;
1867   MPI_Comm       comm=mat->comm;
1868   PetscErrorCode ierr;
1869   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1870   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1871   PetscInt       *rowrange=mat->rmap.range;
1872   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1873   Mat            A=aij->A,B=aij->B,C=*matredundant;
1874   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1875   PetscScalar    *sbuf_a;
1876   PetscInt       nzlocal=a->nz+b->nz;
1877   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1878   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1879   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1880   PetscScalar    *vals,*aworkA,*aworkB;
1881   PetscMPIInt    tag1,tag2,tag3,imdex;
1882   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1883                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1884   MPI_Status     recv_status,*send_status;
1885   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1886   PetscInt       **rbuf_j=PETSC_NULL;
1887   PetscScalar    **rbuf_a=PETSC_NULL;
1888   Mat_Redundant  *redund=PETSC_NULL;
1889   PetscContainer container;
1890 
1891   PetscFunctionBegin;
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1894 
1895   if (reuse == MAT_REUSE_MATRIX) {
1896     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1897     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1898     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1899     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1900     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1901     if (container) {
1902       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1903     } else {
1904       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1905     }
1906     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1907 
1908     nsends    = redund->nsends;
1909     nrecvs    = redund->nrecvs;
1910     send_rank = redund->send_rank; recv_rank = send_rank + size;
1911     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1912     sbuf_j    = redund->sbuf_j;
1913     sbuf_a    = redund->sbuf_a;
1914     rbuf_j    = redund->rbuf_j;
1915     rbuf_a    = redund->rbuf_a;
1916   }
1917 
1918   if (reuse == MAT_INITIAL_MATRIX){
1919     PetscMPIInt  subrank,subsize;
1920     PetscInt     nleftover,np_subcomm;
1921     /* get the destination processors' id send_rank, nsends and nrecvs */
1922     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1923     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1924     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1925     recv_rank = send_rank + size;
1926     np_subcomm = size/nsubcomm;
1927     nleftover  = size - nsubcomm*np_subcomm;
1928     nsends = 0; nrecvs = 0;
1929     for (i=0; i<size; i++){ /* i=rank*/
1930       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1931         send_rank[nsends] = i; nsends++;
1932         recv_rank[nrecvs++] = i;
1933       }
1934     }
1935     if (rank >= size - nleftover){/* this proc is a leftover processor */
1936       i = size-nleftover-1;
1937       j = 0;
1938       while (j < nsubcomm - nleftover){
1939         send_rank[nsends++] = i;
1940         i--; j++;
1941       }
1942     }
1943 
1944     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1945       for (i=0; i<nleftover; i++){
1946         recv_rank[nrecvs++] = size-nleftover+i;
1947       }
1948     }
1949 
1950     /* allocate sbuf_j, sbuf_a */
1951     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1952     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1953     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1954   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1955 
1956   /* copy mat's local entries into the buffers */
1957   if (reuse == MAT_INITIAL_MATRIX){
1958     rownz_max = 0;
1959     rptr = sbuf_j;
1960     cols = sbuf_j + rend-rstart + 1;
1961     vals = sbuf_a;
1962     rptr[0] = 0;
1963     for (i=0; i<rend-rstart; i++){
1964       row = i + rstart;
1965       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1966       ncols  = nzA + nzB;
1967       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1968       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1969       /* load the column indices for this row into cols */
1970       lwrite = 0;
1971       for (l=0; l<nzB; l++) {
1972         if ((ctmp = bmap[cworkB[l]]) < cstart){
1973           vals[lwrite]   = aworkB[l];
1974           cols[lwrite++] = ctmp;
1975         }
1976       }
1977       for (l=0; l<nzA; l++){
1978         vals[lwrite]   = aworkA[l];
1979         cols[lwrite++] = cstart + cworkA[l];
1980       }
1981       for (l=0; l<nzB; l++) {
1982         if ((ctmp = bmap[cworkB[l]]) >= cend){
1983           vals[lwrite]   = aworkB[l];
1984           cols[lwrite++] = ctmp;
1985         }
1986       }
1987       vals += ncols;
1988       cols += ncols;
1989       rptr[i+1] = rptr[i] + ncols;
1990       if (rownz_max < ncols) rownz_max = ncols;
1991     }
1992     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1993   } else { /* only copy matrix values into sbuf_a */
1994     rptr = sbuf_j;
1995     vals = sbuf_a;
1996     rptr[0] = 0;
1997     for (i=0; i<rend-rstart; i++){
1998       row = i + rstart;
1999       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2000       ncols  = nzA + nzB;
2001       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2002       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2003       lwrite = 0;
2004       for (l=0; l<nzB; l++) {
2005         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2006       }
2007       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2008       for (l=0; l<nzB; l++) {
2009         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2010       }
2011       vals += ncols;
2012       rptr[i+1] = rptr[i] + ncols;
2013     }
2014   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2015 
2016   /* send nzlocal to others, and recv other's nzlocal */
2017   /*--------------------------------------------------*/
2018   if (reuse == MAT_INITIAL_MATRIX){
2019     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2020     s_waits2 = s_waits3 + nsends;
2021     s_waits1 = s_waits2 + nsends;
2022     r_waits1 = s_waits1 + nsends;
2023     r_waits2 = r_waits1 + nrecvs;
2024     r_waits3 = r_waits2 + nrecvs;
2025   } else {
2026     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2027     r_waits3 = s_waits3 + nsends;
2028   }
2029 
2030   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2031   if (reuse == MAT_INITIAL_MATRIX){
2032     /* get new tags to keep the communication clean */
2033     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2034     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2035     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2036     rbuf_nz = sbuf_nz + nsends;
2037 
2038     /* post receives of other's nzlocal */
2039     for (i=0; i<nrecvs; i++){
2040       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2041     }
2042     /* send nzlocal to others */
2043     for (i=0; i<nsends; i++){
2044       sbuf_nz[i] = nzlocal;
2045       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2046     }
2047     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2048     count = nrecvs;
2049     while (count) {
2050       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2051       recv_rank[imdex] = recv_status.MPI_SOURCE;
2052       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2053       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2054 
2055       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2056       rbuf_nz[imdex] += i + 2;
2057       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2058       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2059       count--;
2060     }
2061     /* wait on sends of nzlocal */
2062     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2063     /* send mat->i,j to others, and recv from other's */
2064     /*------------------------------------------------*/
2065     for (i=0; i<nsends; i++){
2066       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2067       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2068     }
2069     /* wait on receives of mat->i,j */
2070     /*------------------------------*/
2071     count = nrecvs;
2072     while (count) {
2073       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2074       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2075       count--;
2076     }
2077     /* wait on sends of mat->i,j */
2078     /*---------------------------*/
2079     if (nsends) {
2080       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2081     }
2082   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2083 
2084   /* post receives, send and receive mat->a */
2085   /*----------------------------------------*/
2086   for (imdex=0; imdex<nrecvs; imdex++) {
2087     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2088   }
2089   for (i=0; i<nsends; i++){
2090     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2091   }
2092   count = nrecvs;
2093   while (count) {
2094     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2095     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2096     count--;
2097   }
2098   if (nsends) {
2099     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2100   }
2101 
2102   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2103 
2104   /* create redundant matrix */
2105   /*-------------------------*/
2106   if (reuse == MAT_INITIAL_MATRIX){
2107     /* compute rownz_max for preallocation */
2108     for (imdex=0; imdex<nrecvs; imdex++){
2109       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2110       rptr = rbuf_j[imdex];
2111       for (i=0; i<j; i++){
2112         ncols = rptr[i+1] - rptr[i];
2113         if (rownz_max < ncols) rownz_max = ncols;
2114       }
2115     }
2116 
2117     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2118     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2119     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2120     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2122   } else {
2123     C = *matredundant;
2124   }
2125 
2126   /* insert local matrix entries */
2127   rptr = sbuf_j;
2128   cols = sbuf_j + rend-rstart + 1;
2129   vals = sbuf_a;
2130   for (i=0; i<rend-rstart; i++){
2131     row   = i + rstart;
2132     ncols = rptr[i+1] - rptr[i];
2133     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2134     vals += ncols;
2135     cols += ncols;
2136   }
2137   /* insert received matrix entries */
2138   for (imdex=0; imdex<nrecvs; imdex++){
2139     rstart = rowrange[recv_rank[imdex]];
2140     rend   = rowrange[recv_rank[imdex]+1];
2141     rptr = rbuf_j[imdex];
2142     cols = rbuf_j[imdex] + rend-rstart + 1;
2143     vals = rbuf_a[imdex];
2144     for (i=0; i<rend-rstart; i++){
2145       row   = i + rstart;
2146       ncols = rptr[i+1] - rptr[i];
2147       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2148       vals += ncols;
2149       cols += ncols;
2150     }
2151   }
2152   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2155   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2156   if (reuse == MAT_INITIAL_MATRIX){
2157     PetscContainer container;
2158     *matredundant = C;
2159     /* create a supporting struct and attach it to C for reuse */
2160     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2161     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2162     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2163     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2164     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2165 
2166     redund->nzlocal = nzlocal;
2167     redund->nsends  = nsends;
2168     redund->nrecvs  = nrecvs;
2169     redund->send_rank = send_rank;
2170     redund->sbuf_nz = sbuf_nz;
2171     redund->sbuf_j  = sbuf_j;
2172     redund->sbuf_a  = sbuf_a;
2173     redund->rbuf_j  = rbuf_j;
2174     redund->rbuf_a  = rbuf_a;
2175 
2176     redund->MatDestroy = C->ops->destroy;
2177     C->ops->destroy    = MatDestroy_MatRedundant;
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 /* -------------------------------------------------------------------*/
2183 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2184        MatGetRow_MPIAIJ,
2185        MatRestoreRow_MPIAIJ,
2186        MatMult_MPIAIJ,
2187 /* 4*/ MatMultAdd_MPIAIJ,
2188        MatMultTranspose_MPIAIJ,
2189        MatMultTransposeAdd_MPIAIJ,
2190 #ifdef PETSC_HAVE_PBGL
2191        MatSolve_MPIAIJ,
2192 #else
2193        0,
2194 #endif
2195        0,
2196        0,
2197 /*10*/ 0,
2198        0,
2199        0,
2200        MatRelax_MPIAIJ,
2201        MatTranspose_MPIAIJ,
2202 /*15*/ MatGetInfo_MPIAIJ,
2203        MatEqual_MPIAIJ,
2204        MatGetDiagonal_MPIAIJ,
2205        MatDiagonalScale_MPIAIJ,
2206        MatNorm_MPIAIJ,
2207 /*20*/ MatAssemblyBegin_MPIAIJ,
2208        MatAssemblyEnd_MPIAIJ,
2209        0,
2210        MatSetOption_MPIAIJ,
2211        MatZeroEntries_MPIAIJ,
2212 /*25*/ MatZeroRows_MPIAIJ,
2213        0,
2214 #ifdef PETSC_HAVE_PBGL
2215        MatLUFactorNumeric_MPIAIJ,
2216 #else
2217        0,
2218 #endif
2219        0,
2220        0,
2221 /*30*/ MatSetUpPreallocation_MPIAIJ,
2222 #ifdef PETSC_HAVE_PBGL
2223        MatILUFactorSymbolic_MPIAIJ,
2224 #else
2225        0,
2226 #endif
2227        0,
2228        0,
2229        0,
2230 /*35*/ MatDuplicate_MPIAIJ,
2231        0,
2232        0,
2233        0,
2234        0,
2235 /*40*/ MatAXPY_MPIAIJ,
2236        MatGetSubMatrices_MPIAIJ,
2237        MatIncreaseOverlap_MPIAIJ,
2238        MatGetValues_MPIAIJ,
2239        MatCopy_MPIAIJ,
2240 /*45*/ 0,
2241        MatScale_MPIAIJ,
2242        0,
2243        0,
2244        0,
2245 /*50*/ MatSetBlockSize_MPIAIJ,
2246        0,
2247        0,
2248        0,
2249        0,
2250 /*55*/ MatFDColoringCreate_MPIAIJ,
2251        0,
2252        MatSetUnfactored_MPIAIJ,
2253        MatPermute_MPIAIJ,
2254        0,
2255 /*60*/ MatGetSubMatrix_MPIAIJ,
2256        MatDestroy_MPIAIJ,
2257        MatView_MPIAIJ,
2258        0,
2259        0,
2260 /*65*/ 0,
2261        0,
2262        0,
2263        0,
2264        0,
2265 /*70*/ 0,
2266        0,
2267        MatSetColoring_MPIAIJ,
2268 #if defined(PETSC_HAVE_ADIC)
2269        MatSetValuesAdic_MPIAIJ,
2270 #else
2271        0,
2272 #endif
2273        MatSetValuesAdifor_MPIAIJ,
2274 /*75*/ 0,
2275        0,
2276        0,
2277        0,
2278        0,
2279 /*80*/ 0,
2280        0,
2281        0,
2282        0,
2283 /*84*/ MatLoad_MPIAIJ,
2284        0,
2285        0,
2286        0,
2287        0,
2288        0,
2289 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2290        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2291        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2292        MatPtAP_Basic,
2293        MatPtAPSymbolic_MPIAIJ,
2294 /*95*/ MatPtAPNumeric_MPIAIJ,
2295        0,
2296        0,
2297        0,
2298        0,
2299 /*100*/0,
2300        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2301        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2302        MatConjugate_MPIAIJ,
2303        0,
2304 /*105*/MatSetValuesRow_MPIAIJ,
2305        MatRealPart_MPIAIJ,
2306        MatImaginaryPart_MPIAIJ,
2307        0,
2308        0,
2309 /*110*/0,
2310        MatGetRedundantMatrix_MPIAIJ};
2311 
2312 /* ----------------------------------------------------------------------------------------*/
2313 
2314 EXTERN_C_BEGIN
2315 #undef __FUNCT__
2316 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2317 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2318 {
2319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2320   PetscErrorCode ierr;
2321 
2322   PetscFunctionBegin;
2323   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2324   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2325   PetscFunctionReturn(0);
2326 }
2327 EXTERN_C_END
2328 
2329 EXTERN_C_BEGIN
2330 #undef __FUNCT__
2331 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2332 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2333 {
2334   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2335   PetscErrorCode ierr;
2336 
2337   PetscFunctionBegin;
2338   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2339   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 EXTERN_C_END
2343 
2344 #include "petscpc.h"
2345 EXTERN_C_BEGIN
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2348 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2349 {
2350   Mat_MPIAIJ     *b;
2351   PetscErrorCode ierr;
2352   PetscInt       i;
2353 
2354   PetscFunctionBegin;
2355   B->preallocated = PETSC_TRUE;
2356   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2357   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2358   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2359   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2360 
2361   B->rmap.bs = B->cmap.bs = 1;
2362   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2363   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2364   if (d_nnz) {
2365     for (i=0; i<B->rmap.n; i++) {
2366       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2367     }
2368   }
2369   if (o_nnz) {
2370     for (i=0; i<B->rmap.n; i++) {
2371       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2372     }
2373   }
2374   b = (Mat_MPIAIJ*)B->data;
2375 
2376   /* Explicitly create 2 MATSEQAIJ matrices. */
2377   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2378   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2379   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2380   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2381   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2382   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2383   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2384   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2385 
2386   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2387   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2388 
2389   PetscFunctionReturn(0);
2390 }
2391 EXTERN_C_END
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2395 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2396 {
2397   Mat            mat;
2398   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2399   PetscErrorCode ierr;
2400 
2401   PetscFunctionBegin;
2402   *newmat       = 0;
2403   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2404   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2405   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2406   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2407   a    = (Mat_MPIAIJ*)mat->data;
2408 
2409   mat->factor       = matin->factor;
2410   mat->rmap.bs      = matin->rmap.bs;
2411   mat->assembled    = PETSC_TRUE;
2412   mat->insertmode   = NOT_SET_VALUES;
2413   mat->preallocated = PETSC_TRUE;
2414 
2415   a->size           = oldmat->size;
2416   a->rank           = oldmat->rank;
2417   a->donotstash     = oldmat->donotstash;
2418   a->roworiented    = oldmat->roworiented;
2419   a->rowindices     = 0;
2420   a->rowvalues      = 0;
2421   a->getrowactive   = PETSC_FALSE;
2422 
2423   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2424   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2425 
2426   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2427   if (oldmat->colmap) {
2428 #if defined (PETSC_USE_CTABLE)
2429     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2430 #else
2431     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2432     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2433     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2434 #endif
2435   } else a->colmap = 0;
2436   if (oldmat->garray) {
2437     PetscInt len;
2438     len  = oldmat->B->cmap.n;
2439     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2440     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2441     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2442   } else a->garray = 0;
2443 
2444   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2445   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2446   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2447   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2448   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2450   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2452   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2453   *newmat = mat;
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #include "petscsys.h"
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatLoad_MPIAIJ"
2461 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2462 {
2463   Mat            A;
2464   PetscScalar    *vals,*svals;
2465   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2466   MPI_Status     status;
2467   PetscErrorCode ierr;
2468   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2469   PetscInt       i,nz,j,rstart,rend,mmax;
2470   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2471   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2472   PetscInt       cend,cstart,n,*rowners;
2473   int            fd;
2474 
2475   PetscFunctionBegin;
2476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2477   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2478   if (!rank) {
2479     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2480     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2481     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2482   }
2483 
2484   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2485   M = header[1]; N = header[2];
2486   /* determine ownership of all rows */
2487   m    = M/size + ((M % size) > rank);
2488   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2489   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2490 
2491   /* First process needs enough room for process with most rows */
2492   if (!rank) {
2493     mmax       = rowners[1];
2494     for (i=2; i<size; i++) {
2495       mmax = PetscMax(mmax,rowners[i]);
2496     }
2497   } else mmax = m;
2498 
2499   rowners[0] = 0;
2500   for (i=2; i<=size; i++) {
2501     rowners[i] += rowners[i-1];
2502   }
2503   rstart = rowners[rank];
2504   rend   = rowners[rank+1];
2505 
2506   /* distribute row lengths to all processors */
2507   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2508   if (!rank) {
2509     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2510     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2511     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2512     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2513     for (j=0; j<m; j++) {
2514       procsnz[0] += ourlens[j];
2515     }
2516     for (i=1; i<size; i++) {
2517       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2518       /* calculate the number of nonzeros on each processor */
2519       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2520         procsnz[i] += rowlengths[j];
2521       }
2522       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2523     }
2524     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2525   } else {
2526     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2527   }
2528 
2529   if (!rank) {
2530     /* determine max buffer needed and allocate it */
2531     maxnz = 0;
2532     for (i=0; i<size; i++) {
2533       maxnz = PetscMax(maxnz,procsnz[i]);
2534     }
2535     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2536 
2537     /* read in my part of the matrix column indices  */
2538     nz   = procsnz[0];
2539     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2540     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2541 
2542     /* read in every one elses and ship off */
2543     for (i=1; i<size; i++) {
2544       nz   = procsnz[i];
2545       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2546       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2547     }
2548     ierr = PetscFree(cols);CHKERRQ(ierr);
2549   } else {
2550     /* determine buffer space needed for message */
2551     nz = 0;
2552     for (i=0; i<m; i++) {
2553       nz += ourlens[i];
2554     }
2555     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2556 
2557     /* receive message of column indices*/
2558     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2559     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2560     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2561   }
2562 
2563   /* determine column ownership if matrix is not square */
2564   if (N != M) {
2565     n      = N/size + ((N % size) > rank);
2566     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2567     cstart = cend - n;
2568   } else {
2569     cstart = rstart;
2570     cend   = rend;
2571     n      = cend - cstart;
2572   }
2573 
2574   /* loop over local rows, determining number of off diagonal entries */
2575   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2576   jj = 0;
2577   for (i=0; i<m; i++) {
2578     for (j=0; j<ourlens[i]; j++) {
2579       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2580       jj++;
2581     }
2582   }
2583 
2584   /* create our matrix */
2585   for (i=0; i<m; i++) {
2586     ourlens[i] -= offlens[i];
2587   }
2588   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2589   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2590   ierr = MatSetType(A,type);CHKERRQ(ierr);
2591   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2592 
2593   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2594   for (i=0; i<m; i++) {
2595     ourlens[i] += offlens[i];
2596   }
2597 
2598   if (!rank) {
2599     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2600 
2601     /* read in my part of the matrix numerical values  */
2602     nz   = procsnz[0];
2603     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2604 
2605     /* insert into matrix */
2606     jj      = rstart;
2607     smycols = mycols;
2608     svals   = vals;
2609     for (i=0; i<m; i++) {
2610       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2611       smycols += ourlens[i];
2612       svals   += ourlens[i];
2613       jj++;
2614     }
2615 
2616     /* read in other processors and ship out */
2617     for (i=1; i<size; i++) {
2618       nz   = procsnz[i];
2619       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2620       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2621     }
2622     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2623   } else {
2624     /* receive numeric values */
2625     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2626 
2627     /* receive message of values*/
2628     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2629     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2630     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2631 
2632     /* insert into matrix */
2633     jj      = rstart;
2634     smycols = mycols;
2635     svals   = vals;
2636     for (i=0; i<m; i++) {
2637       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2638       smycols += ourlens[i];
2639       svals   += ourlens[i];
2640       jj++;
2641     }
2642   }
2643   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2644   ierr = PetscFree(vals);CHKERRQ(ierr);
2645   ierr = PetscFree(mycols);CHKERRQ(ierr);
2646   ierr = PetscFree(rowners);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   *newmat = A;
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2656 /*
2657     Not great since it makes two copies of the submatrix, first an SeqAIJ
2658   in local and then by concatenating the local matrices the end result.
2659   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2660 */
2661 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2662 {
2663   PetscErrorCode ierr;
2664   PetscMPIInt    rank,size;
2665   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2666   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2667   Mat            *local,M,Mreuse;
2668   PetscScalar    *vwork,*aa;
2669   MPI_Comm       comm = mat->comm;
2670   Mat_SeqAIJ     *aij;
2671 
2672 
2673   PetscFunctionBegin;
2674   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2676 
2677   if (call ==  MAT_REUSE_MATRIX) {
2678     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2679     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2680     local = &Mreuse;
2681     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2682   } else {
2683     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2684     Mreuse = *local;
2685     ierr   = PetscFree(local);CHKERRQ(ierr);
2686   }
2687 
2688   /*
2689       m - number of local rows
2690       n - number of columns (same on all processors)
2691       rstart - first row in new global matrix generated
2692   */
2693   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2694   if (call == MAT_INITIAL_MATRIX) {
2695     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2696     ii  = aij->i;
2697     jj  = aij->j;
2698 
2699     /*
2700         Determine the number of non-zeros in the diagonal and off-diagonal
2701         portions of the matrix in order to do correct preallocation
2702     */
2703 
2704     /* first get start and end of "diagonal" columns */
2705     if (csize == PETSC_DECIDE) {
2706       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2707       if (mglobal == n) { /* square matrix */
2708 	nlocal = m;
2709       } else {
2710         nlocal = n/size + ((n % size) > rank);
2711       }
2712     } else {
2713       nlocal = csize;
2714     }
2715     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2716     rstart = rend - nlocal;
2717     if (rank == size - 1 && rend != n) {
2718       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2719     }
2720 
2721     /* next, compute all the lengths */
2722     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2723     olens = dlens + m;
2724     for (i=0; i<m; i++) {
2725       jend = ii[i+1] - ii[i];
2726       olen = 0;
2727       dlen = 0;
2728       for (j=0; j<jend; j++) {
2729         if (*jj < rstart || *jj >= rend) olen++;
2730         else dlen++;
2731         jj++;
2732       }
2733       olens[i] = olen;
2734       dlens[i] = dlen;
2735     }
2736     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2737     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2738     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2739     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2740     ierr = PetscFree(dlens);CHKERRQ(ierr);
2741   } else {
2742     PetscInt ml,nl;
2743 
2744     M = *newmat;
2745     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2746     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2747     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2748     /*
2749          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2750        rather than the slower MatSetValues().
2751     */
2752     M->was_assembled = PETSC_TRUE;
2753     M->assembled     = PETSC_FALSE;
2754   }
2755   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2756   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2757   ii  = aij->i;
2758   jj  = aij->j;
2759   aa  = aij->a;
2760   for (i=0; i<m; i++) {
2761     row   = rstart + i;
2762     nz    = ii[i+1] - ii[i];
2763     cwork = jj;     jj += nz;
2764     vwork = aa;     aa += nz;
2765     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2766   }
2767 
2768   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2770   *newmat = M;
2771 
2772   /* save submatrix used in processor for next request */
2773   if (call ==  MAT_INITIAL_MATRIX) {
2774     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2775     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2776   }
2777 
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 EXTERN_C_BEGIN
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2784 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2785 {
2786   PetscInt       m,cstart, cend,j,nnz,i,d;
2787   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2788   const PetscInt *JJ;
2789   PetscScalar    *values;
2790   PetscErrorCode ierr;
2791 
2792   PetscFunctionBegin;
2793   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2794 
2795   B->rmap.bs = B->cmap.bs = 1;
2796   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2797   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2798   m      = B->rmap.n;
2799   cstart = B->cmap.rstart;
2800   cend   = B->cmap.rend;
2801   rstart = B->rmap.rstart;
2802 
2803   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2804   o_nnz = d_nnz + m;
2805 
2806   for (i=0; i<m; i++) {
2807     nnz     = Ii[i+1]- Ii[i];
2808     JJ      = J + Ii[i];
2809     nnz_max = PetscMax(nnz_max,nnz);
2810     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2811     for (j=0; j<nnz; j++) {
2812       if (*JJ >= cstart) break;
2813       JJ++;
2814     }
2815     d = 0;
2816     for (; j<nnz; j++) {
2817       if (*JJ++ >= cend) break;
2818       d++;
2819     }
2820     d_nnz[i] = d;
2821     o_nnz[i] = nnz - d;
2822   }
2823   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2824   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2825 
2826   if (v) values = (PetscScalar*)v;
2827   else {
2828     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2829     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2830   }
2831 
2832   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2833   for (i=0; i<m; i++) {
2834     ii   = i + rstart;
2835     nnz  = Ii[i+1]- Ii[i];
2836     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2837   }
2838   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2841 
2842   if (!v) {
2843     ierr = PetscFree(values);CHKERRQ(ierr);
2844   }
2845   PetscFunctionReturn(0);
2846 }
2847 EXTERN_C_END
2848 
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2851 /*@
2852    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2853    (the default parallel PETSc format).
2854 
2855    Collective on MPI_Comm
2856 
2857    Input Parameters:
2858 +  B - the matrix
2859 .  i - the indices into j for the start of each local row (starts with zero)
2860 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2861 -  v - optional values in the matrix
2862 
2863    Level: developer
2864 
2865    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2866      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2867 
2868 .keywords: matrix, aij, compressed row, sparse, parallel
2869 
2870 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2871           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2872 @*/
2873 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2874 {
2875   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2876 
2877   PetscFunctionBegin;
2878   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2879   if (f) {
2880     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2881   }
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2887 /*@C
2888    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2889    (the default parallel PETSc format).  For good matrix assembly performance
2890    the user should preallocate the matrix storage by setting the parameters
2891    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2892    performance can be increased by more than a factor of 50.
2893 
2894    Collective on MPI_Comm
2895 
2896    Input Parameters:
2897 +  A - the matrix
2898 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2899            (same value is used for all local rows)
2900 .  d_nnz - array containing the number of nonzeros in the various rows of the
2901            DIAGONAL portion of the local submatrix (possibly different for each row)
2902            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2903            The size of this array is equal to the number of local rows, i.e 'm'.
2904            You must leave room for the diagonal entry even if it is zero.
2905 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2906            submatrix (same value is used for all local rows).
2907 -  o_nnz - array containing the number of nonzeros in the various rows of the
2908            OFF-DIAGONAL portion of the local submatrix (possibly different for
2909            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2910            structure. The size of this array is equal to the number
2911            of local rows, i.e 'm'.
2912 
2913    If the *_nnz parameter is given then the *_nz parameter is ignored
2914 
2915    The AIJ format (also called the Yale sparse matrix format or
2916    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2917    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2918 
2919    The parallel matrix is partitioned such that the first m0 rows belong to
2920    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2921    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2922 
2923    The DIAGONAL portion of the local submatrix of a processor can be defined
2924    as the submatrix which is obtained by extraction the part corresponding
2925    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2926    first row that belongs to the processor, and r2 is the last row belonging
2927    to the this processor. This is a square mxm matrix. The remaining portion
2928    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2929 
2930    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2931 
2932    Example usage:
2933 
2934    Consider the following 8x8 matrix with 34 non-zero values, that is
2935    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2936    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2937    as follows:
2938 
2939 .vb
2940             1  2  0  |  0  3  0  |  0  4
2941     Proc0   0  5  6  |  7  0  0  |  8  0
2942             9  0 10  | 11  0  0  | 12  0
2943     -------------------------------------
2944            13  0 14  | 15 16 17  |  0  0
2945     Proc1   0 18  0  | 19 20 21  |  0  0
2946             0  0  0  | 22 23  0  | 24  0
2947     -------------------------------------
2948     Proc2  25 26 27  |  0  0 28  | 29  0
2949            30  0  0  | 31 32 33  |  0 34
2950 .ve
2951 
2952    This can be represented as a collection of submatrices as:
2953 
2954 .vb
2955       A B C
2956       D E F
2957       G H I
2958 .ve
2959 
2960    Where the submatrices A,B,C are owned by proc0, D,E,F are
2961    owned by proc1, G,H,I are owned by proc2.
2962 
2963    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2964    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2965    The 'M','N' parameters are 8,8, and have the same values on all procs.
2966 
2967    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2968    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2969    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2970    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2971    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2972    matrix, ans [DF] as another SeqAIJ matrix.
2973 
2974    When d_nz, o_nz parameters are specified, d_nz storage elements are
2975    allocated for every row of the local diagonal submatrix, and o_nz
2976    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2977    One way to choose d_nz and o_nz is to use the max nonzerors per local
2978    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2979    In this case, the values of d_nz,o_nz are:
2980 .vb
2981      proc0 : dnz = 2, o_nz = 2
2982      proc1 : dnz = 3, o_nz = 2
2983      proc2 : dnz = 1, o_nz = 4
2984 .ve
2985    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2986    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2987    for proc3. i.e we are using 12+15+10=37 storage locations to store
2988    34 values.
2989 
2990    When d_nnz, o_nnz parameters are specified, the storage is specified
2991    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2992    In the above case the values for d_nnz,o_nnz are:
2993 .vb
2994      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2995      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2996      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2997 .ve
2998    Here the space allocated is sum of all the above values i.e 34, and
2999    hence pre-allocation is perfect.
3000 
3001    Level: intermediate
3002 
3003 .keywords: matrix, aij, compressed row, sparse, parallel
3004 
3005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3006           MPIAIJ
3007 @*/
3008 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3009 {
3010   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3011 
3012   PetscFunctionBegin;
3013   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3014   if (f) {
3015     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3016   }
3017   PetscFunctionReturn(0);
3018 }
3019 
3020 #undef __FUNCT__
3021 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3022 /*@C
3023      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3024          CSR format the local rows.
3025 
3026    Collective on MPI_Comm
3027 
3028    Input Parameters:
3029 +  comm - MPI communicator
3030 .  m - number of local rows (Cannot be PETSC_DECIDE)
3031 .  n - This value should be the same as the local size used in creating the
3032        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3033        calculated if N is given) For square matrices n is almost always m.
3034 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3035 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3036 .   i - row indices
3037 .   j - column indices
3038 -   a - matrix values
3039 
3040    Output Parameter:
3041 .   mat - the matrix
3042 
3043    Level: intermediate
3044 
3045    Notes:
3046        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3047      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3048      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3049 
3050        The i and j indices are 0 based
3051 
3052 .keywords: matrix, aij, compressed row, sparse, parallel
3053 
3054 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3055           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3056 @*/
3057 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3058 {
3059   PetscErrorCode ierr;
3060 
3061  PetscFunctionBegin;
3062   if (i[0]) {
3063     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3064   }
3065   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3066   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3067   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3068   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatCreateMPIAIJ"
3074 /*@C
3075    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3076    (the default parallel PETSc format).  For good matrix assembly performance
3077    the user should preallocate the matrix storage by setting the parameters
3078    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3079    performance can be increased by more than a factor of 50.
3080 
3081    Collective on MPI_Comm
3082 
3083    Input Parameters:
3084 +  comm - MPI communicator
3085 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3086            This value should be the same as the local size used in creating the
3087            y vector for the matrix-vector product y = Ax.
3088 .  n - This value should be the same as the local size used in creating the
3089        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3090        calculated if N is given) For square matrices n is almost always m.
3091 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3092 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3093 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3094            (same value is used for all local rows)
3095 .  d_nnz - array containing the number of nonzeros in the various rows of the
3096            DIAGONAL portion of the local submatrix (possibly different for each row)
3097            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3098            The size of this array is equal to the number of local rows, i.e 'm'.
3099            You must leave room for the diagonal entry even if it is zero.
3100 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3101            submatrix (same value is used for all local rows).
3102 -  o_nnz - array containing the number of nonzeros in the various rows of the
3103            OFF-DIAGONAL portion of the local submatrix (possibly different for
3104            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3105            structure. The size of this array is equal to the number
3106            of local rows, i.e 'm'.
3107 
3108    Output Parameter:
3109 .  A - the matrix
3110 
3111    Notes:
3112    If the *_nnz parameter is given then the *_nz parameter is ignored
3113 
3114    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3115    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3116    storage requirements for this matrix.
3117 
3118    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3119    processor than it must be used on all processors that share the object for
3120    that argument.
3121 
3122    The user MUST specify either the local or global matrix dimensions
3123    (possibly both).
3124 
3125    The parallel matrix is partitioned across processors such that the
3126    first m0 rows belong to process 0, the next m1 rows belong to
3127    process 1, the next m2 rows belong to process 2 etc.. where
3128    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3129    values corresponding to [m x N] submatrix.
3130 
3131    The columns are logically partitioned with the n0 columns belonging
3132    to 0th partition, the next n1 columns belonging to the next
3133    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3134 
3135    The DIAGONAL portion of the local submatrix on any given processor
3136    is the submatrix corresponding to the rows and columns m,n
3137    corresponding to the given processor. i.e diagonal matrix on
3138    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3139    etc. The remaining portion of the local submatrix [m x (N-n)]
3140    constitute the OFF-DIAGONAL portion. The example below better
3141    illustrates this concept.
3142 
3143    For a square global matrix we define each processor's diagonal portion
3144    to be its local rows and the corresponding columns (a square submatrix);
3145    each processor's off-diagonal portion encompasses the remainder of the
3146    local matrix (a rectangular submatrix).
3147 
3148    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3149 
3150    When calling this routine with a single process communicator, a matrix of
3151    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3152    type of communicator, use the construction mechanism:
3153      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3154 
3155    By default, this format uses inodes (identical nodes) when possible.
3156    We search for consecutive rows with the same nonzero structure, thereby
3157    reusing matrix information to achieve increased efficiency.
3158 
3159    Options Database Keys:
3160 +  -mat_no_inode  - Do not use inodes
3161 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3162 -  -mat_aij_oneindex - Internally use indexing starting at 1
3163         rather than 0.  Note that when calling MatSetValues(),
3164         the user still MUST index entries starting at 0!
3165 
3166 
3167    Example usage:
3168 
3169    Consider the following 8x8 matrix with 34 non-zero values, that is
3170    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3171    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3172    as follows:
3173 
3174 .vb
3175             1  2  0  |  0  3  0  |  0  4
3176     Proc0   0  5  6  |  7  0  0  |  8  0
3177             9  0 10  | 11  0  0  | 12  0
3178     -------------------------------------
3179            13  0 14  | 15 16 17  |  0  0
3180     Proc1   0 18  0  | 19 20 21  |  0  0
3181             0  0  0  | 22 23  0  | 24  0
3182     -------------------------------------
3183     Proc2  25 26 27  |  0  0 28  | 29  0
3184            30  0  0  | 31 32 33  |  0 34
3185 .ve
3186 
3187    This can be represented as a collection of submatrices as:
3188 
3189 .vb
3190       A B C
3191       D E F
3192       G H I
3193 .ve
3194 
3195    Where the submatrices A,B,C are owned by proc0, D,E,F are
3196    owned by proc1, G,H,I are owned by proc2.
3197 
3198    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3199    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3200    The 'M','N' parameters are 8,8, and have the same values on all procs.
3201 
3202    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3203    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3204    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3205    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3206    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3207    matrix, ans [DF] as another SeqAIJ matrix.
3208 
3209    When d_nz, o_nz parameters are specified, d_nz storage elements are
3210    allocated for every row of the local diagonal submatrix, and o_nz
3211    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3212    One way to choose d_nz and o_nz is to use the max nonzerors per local
3213    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3214    In this case, the values of d_nz,o_nz are:
3215 .vb
3216      proc0 : dnz = 2, o_nz = 2
3217      proc1 : dnz = 3, o_nz = 2
3218      proc2 : dnz = 1, o_nz = 4
3219 .ve
3220    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3221    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3222    for proc3. i.e we are using 12+15+10=37 storage locations to store
3223    34 values.
3224 
3225    When d_nnz, o_nnz parameters are specified, the storage is specified
3226    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3227    In the above case the values for d_nnz,o_nnz are:
3228 .vb
3229      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3230      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3231      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3232 .ve
3233    Here the space allocated is sum of all the above values i.e 34, and
3234    hence pre-allocation is perfect.
3235 
3236    Level: intermediate
3237 
3238 .keywords: matrix, aij, compressed row, sparse, parallel
3239 
3240 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3241           MPIAIJ, MatCreateMPIAIJWithArrays()
3242 @*/
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3244 {
3245   PetscErrorCode ierr;
3246   PetscMPIInt    size;
3247 
3248   PetscFunctionBegin;
3249   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3250   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3251   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3252   if (size > 1) {
3253     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3254     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3255   } else {
3256     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3257     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3258   }
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3264 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3265 {
3266   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3267 
3268   PetscFunctionBegin;
3269   *Ad     = a->A;
3270   *Ao     = a->B;
3271   *colmap = a->garray;
3272   PetscFunctionReturn(0);
3273 }
3274 
3275 #undef __FUNCT__
3276 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3277 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3278 {
3279   PetscErrorCode ierr;
3280   PetscInt       i;
3281   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3282 
3283   PetscFunctionBegin;
3284   if (coloring->ctype == IS_COLORING_GLOBAL) {
3285     ISColoringValue *allcolors,*colors;
3286     ISColoring      ocoloring;
3287 
3288     /* set coloring for diagonal portion */
3289     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3290 
3291     /* set coloring for off-diagonal portion */
3292     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3293     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3294     for (i=0; i<a->B->cmap.n; i++) {
3295       colors[i] = allcolors[a->garray[i]];
3296     }
3297     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3298     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3299     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3300     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3301   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3302     ISColoringValue *colors;
3303     PetscInt        *larray;
3304     ISColoring      ocoloring;
3305 
3306     /* set coloring for diagonal portion */
3307     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3308     for (i=0; i<a->A->cmap.n; i++) {
3309       larray[i] = i + A->cmap.rstart;
3310     }
3311     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3312     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3313     for (i=0; i<a->A->cmap.n; i++) {
3314       colors[i] = coloring->colors[larray[i]];
3315     }
3316     ierr = PetscFree(larray);CHKERRQ(ierr);
3317     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3318     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3319     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3320 
3321     /* set coloring for off-diagonal portion */
3322     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3323     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3324     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3325     for (i=0; i<a->B->cmap.n; i++) {
3326       colors[i] = coloring->colors[larray[i]];
3327     }
3328     ierr = PetscFree(larray);CHKERRQ(ierr);
3329     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3330     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3331     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3332   } else {
3333     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3334   }
3335 
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 #if defined(PETSC_HAVE_ADIC)
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3342 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3343 {
3344   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3345   PetscErrorCode ierr;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3349   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 #endif
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3356 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3357 {
3358   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3359   PetscErrorCode ierr;
3360 
3361   PetscFunctionBegin;
3362   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3363   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3364   PetscFunctionReturn(0);
3365 }
3366 
3367 #undef __FUNCT__
3368 #define __FUNCT__ "MatMerge"
3369 /*@C
3370       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3371                  matrices from each processor
3372 
3373     Collective on MPI_Comm
3374 
3375    Input Parameters:
3376 +    comm - the communicators the parallel matrix will live on
3377 .    inmat - the input sequential matrices
3378 .    n - number of local columns (or PETSC_DECIDE)
3379 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3380 
3381    Output Parameter:
3382 .    outmat - the parallel matrix generated
3383 
3384     Level: advanced
3385 
3386    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3387 
3388 @*/
3389 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3390 {
3391   PetscErrorCode ierr;
3392   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3393   PetscInt       *indx;
3394   PetscScalar    *values;
3395 
3396   PetscFunctionBegin;
3397   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3398   if (scall == MAT_INITIAL_MATRIX){
3399     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3400     if (n == PETSC_DECIDE){
3401       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3402     }
3403     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3404     rstart -= m;
3405 
3406     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3407     for (i=0;i<m;i++) {
3408       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3409       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3410       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3411     }
3412     /* This routine will ONLY return MPIAIJ type matrix */
3413     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3414     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3415     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3416     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3417     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3418 
3419   } else if (scall == MAT_REUSE_MATRIX){
3420     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3421   } else {
3422     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3423   }
3424 
3425   for (i=0;i<m;i++) {
3426     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3427     Ii    = i + rstart;
3428     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3429     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3430   }
3431   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3432   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3433   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3434 
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatFileSplit"
3440 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3441 {
3442   PetscErrorCode    ierr;
3443   PetscMPIInt       rank;
3444   PetscInt          m,N,i,rstart,nnz;
3445   size_t            len;
3446   const PetscInt    *indx;
3447   PetscViewer       out;
3448   char              *name;
3449   Mat               B;
3450   const PetscScalar *values;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3454   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3455   /* Should this be the type of the diagonal block of A? */
3456   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3457   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3458   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3459   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3460   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3461   for (i=0;i<m;i++) {
3462     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3463     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3464     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3465   }
3466   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3467   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3468 
3469   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3470   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3471   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3472   sprintf(name,"%s.%d",outfile,rank);
3473   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3474   ierr = PetscFree(name);
3475   ierr = MatView(B,out);CHKERRQ(ierr);
3476   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3477   ierr = MatDestroy(B);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3484 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3485 {
3486   PetscErrorCode       ierr;
3487   Mat_Merge_SeqsToMPI  *merge;
3488   PetscContainer       container;
3489 
3490   PetscFunctionBegin;
3491   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3492   if (container) {
3493     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3494     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3495     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3496     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3497     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3498     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3499     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3500     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3501     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3502     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3503     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3504     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3505 
3506     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3507     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3508   }
3509   ierr = PetscFree(merge);CHKERRQ(ierr);
3510 
3511   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3512   PetscFunctionReturn(0);
3513 }
3514 
3515 #include "src/mat/utils/freespace.h"
3516 #include "petscbt.h"
3517 static PetscEvent logkey_seqstompinum = 0;
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3520 /*@C
3521       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3522                  matrices from each processor
3523 
3524     Collective on MPI_Comm
3525 
3526    Input Parameters:
3527 +    comm - the communicators the parallel matrix will live on
3528 .    seqmat - the input sequential matrices
3529 .    m - number of local rows (or PETSC_DECIDE)
3530 .    n - number of local columns (or PETSC_DECIDE)
3531 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3532 
3533    Output Parameter:
3534 .    mpimat - the parallel matrix generated
3535 
3536     Level: advanced
3537 
3538    Notes:
3539      The dimensions of the sequential matrix in each processor MUST be the same.
3540      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3541      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3542 @*/
3543 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3544 {
3545   PetscErrorCode       ierr;
3546   MPI_Comm             comm=mpimat->comm;
3547   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3548   PetscMPIInt          size,rank,taga,*len_s;
3549   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3550   PetscInt             proc,m;
3551   PetscInt             **buf_ri,**buf_rj;
3552   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3553   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3554   MPI_Request          *s_waits,*r_waits;
3555   MPI_Status           *status;
3556   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3557   Mat_Merge_SeqsToMPI  *merge;
3558   PetscContainer       container;
3559 
3560   PetscFunctionBegin;
3561   if (!logkey_seqstompinum) {
3562     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3563   }
3564   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3565 
3566   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3567   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3568 
3569   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3570   if (container) {
3571     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3572   }
3573   bi     = merge->bi;
3574   bj     = merge->bj;
3575   buf_ri = merge->buf_ri;
3576   buf_rj = merge->buf_rj;
3577 
3578   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3579   owners = merge->rowmap.range;
3580   len_s  = merge->len_s;
3581 
3582   /* send and recv matrix values */
3583   /*-----------------------------*/
3584   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3585   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3586 
3587   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3588   for (proc=0,k=0; proc<size; proc++){
3589     if (!len_s[proc]) continue;
3590     i = owners[proc];
3591     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3592     k++;
3593   }
3594 
3595   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3596   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3597   ierr = PetscFree(status);CHKERRQ(ierr);
3598 
3599   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3600   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3601 
3602   /* insert mat values of mpimat */
3603   /*----------------------------*/
3604   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3605   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3606   nextrow = buf_ri_k + merge->nrecv;
3607   nextai  = nextrow + merge->nrecv;
3608 
3609   for (k=0; k<merge->nrecv; k++){
3610     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3611     nrows = *(buf_ri_k[k]);
3612     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3613     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3614   }
3615 
3616   /* set values of ba */
3617   m = merge->rowmap.n;
3618   for (i=0; i<m; i++) {
3619     arow = owners[rank] + i;
3620     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3621     bnzi = bi[i+1] - bi[i];
3622     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3623 
3624     /* add local non-zero vals of this proc's seqmat into ba */
3625     anzi = ai[arow+1] - ai[arow];
3626     aj   = a->j + ai[arow];
3627     aa   = a->a + ai[arow];
3628     nextaj = 0;
3629     for (j=0; nextaj<anzi; j++){
3630       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3631         ba_i[j] += aa[nextaj++];
3632       }
3633     }
3634 
3635     /* add received vals into ba */
3636     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3637       /* i-th row */
3638       if (i == *nextrow[k]) {
3639         anzi = *(nextai[k]+1) - *nextai[k];
3640         aj   = buf_rj[k] + *(nextai[k]);
3641         aa   = abuf_r[k] + *(nextai[k]);
3642         nextaj = 0;
3643         for (j=0; nextaj<anzi; j++){
3644           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3645             ba_i[j] += aa[nextaj++];
3646           }
3647         }
3648         nextrow[k]++; nextai[k]++;
3649       }
3650     }
3651     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3652   }
3653   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655 
3656   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3657   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3658   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3659   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3660   PetscFunctionReturn(0);
3661 }
3662 
3663 static PetscEvent logkey_seqstompisym = 0;
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3666 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3667 {
3668   PetscErrorCode       ierr;
3669   Mat                  B_mpi;
3670   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3671   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3672   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3673   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3674   PetscInt             len,proc,*dnz,*onz;
3675   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3676   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3677   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3678   MPI_Status           *status;
3679   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3680   PetscBT              lnkbt;
3681   Mat_Merge_SeqsToMPI  *merge;
3682   PetscContainer       container;
3683 
3684   PetscFunctionBegin;
3685   if (!logkey_seqstompisym) {
3686     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3687   }
3688   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3689 
3690   /* make sure it is a PETSc comm */
3691   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3692   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3693   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3694 
3695   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3696   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3697 
3698   /* determine row ownership */
3699   /*---------------------------------------------------------*/
3700   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3701   merge->rowmap.n = m;
3702   merge->rowmap.N = M;
3703   merge->rowmap.bs = 1;
3704   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3705   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3706   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3707 
3708   m      = merge->rowmap.n;
3709   M      = merge->rowmap.N;
3710   owners = merge->rowmap.range;
3711 
3712   /* determine the number of messages to send, their lengths */
3713   /*---------------------------------------------------------*/
3714   len_s  = merge->len_s;
3715 
3716   len = 0;  /* length of buf_si[] */
3717   merge->nsend = 0;
3718   for (proc=0; proc<size; proc++){
3719     len_si[proc] = 0;
3720     if (proc == rank){
3721       len_s[proc] = 0;
3722     } else {
3723       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3724       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3725     }
3726     if (len_s[proc]) {
3727       merge->nsend++;
3728       nrows = 0;
3729       for (i=owners[proc]; i<owners[proc+1]; i++){
3730         if (ai[i+1] > ai[i]) nrows++;
3731       }
3732       len_si[proc] = 2*(nrows+1);
3733       len += len_si[proc];
3734     }
3735   }
3736 
3737   /* determine the number and length of messages to receive for ij-structure */
3738   /*-------------------------------------------------------------------------*/
3739   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3740   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3741 
3742   /* post the Irecv of j-structure */
3743   /*-------------------------------*/
3744   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3745   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3746 
3747   /* post the Isend of j-structure */
3748   /*--------------------------------*/
3749   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3750   sj_waits = si_waits + merge->nsend;
3751 
3752   for (proc=0, k=0; proc<size; proc++){
3753     if (!len_s[proc]) continue;
3754     i = owners[proc];
3755     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3756     k++;
3757   }
3758 
3759   /* receives and sends of j-structure are complete */
3760   /*------------------------------------------------*/
3761   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3762   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3763 
3764   /* send and recv i-structure */
3765   /*---------------------------*/
3766   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3767   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3768 
3769   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3770   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3771   for (proc=0,k=0; proc<size; proc++){
3772     if (!len_s[proc]) continue;
3773     /* form outgoing message for i-structure:
3774          buf_si[0]:                 nrows to be sent
3775                [1:nrows]:           row index (global)
3776                [nrows+1:2*nrows+1]: i-structure index
3777     */
3778     /*-------------------------------------------*/
3779     nrows = len_si[proc]/2 - 1;
3780     buf_si_i    = buf_si + nrows+1;
3781     buf_si[0]   = nrows;
3782     buf_si_i[0] = 0;
3783     nrows = 0;
3784     for (i=owners[proc]; i<owners[proc+1]; i++){
3785       anzi = ai[i+1] - ai[i];
3786       if (anzi) {
3787         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3788         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3789         nrows++;
3790       }
3791     }
3792     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3793     k++;
3794     buf_si += len_si[proc];
3795   }
3796 
3797   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3798   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3799 
3800   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3801   for (i=0; i<merge->nrecv; i++){
3802     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3803   }
3804 
3805   ierr = PetscFree(len_si);CHKERRQ(ierr);
3806   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3807   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3808   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3809   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3810   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3811   ierr = PetscFree(status);CHKERRQ(ierr);
3812 
3813   /* compute a local seq matrix in each processor */
3814   /*----------------------------------------------*/
3815   /* allocate bi array and free space for accumulating nonzero column info */
3816   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3817   bi[0] = 0;
3818 
3819   /* create and initialize a linked list */
3820   nlnk = N+1;
3821   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3822 
3823   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3824   len = 0;
3825   len  = ai[owners[rank+1]] - ai[owners[rank]];
3826   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3827   current_space = free_space;
3828 
3829   /* determine symbolic info for each local row */
3830   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3831   nextrow = buf_ri_k + merge->nrecv;
3832   nextai  = nextrow + merge->nrecv;
3833   for (k=0; k<merge->nrecv; k++){
3834     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3835     nrows = *buf_ri_k[k];
3836     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3837     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3838   }
3839 
3840   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3841   len = 0;
3842   for (i=0;i<m;i++) {
3843     bnzi   = 0;
3844     /* add local non-zero cols of this proc's seqmat into lnk */
3845     arow   = owners[rank] + i;
3846     anzi   = ai[arow+1] - ai[arow];
3847     aj     = a->j + ai[arow];
3848     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3849     bnzi += nlnk;
3850     /* add received col data into lnk */
3851     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3852       if (i == *nextrow[k]) { /* i-th row */
3853         anzi = *(nextai[k]+1) - *nextai[k];
3854         aj   = buf_rj[k] + *nextai[k];
3855         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3856         bnzi += nlnk;
3857         nextrow[k]++; nextai[k]++;
3858       }
3859     }
3860     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3861 
3862     /* if free space is not available, make more free space */
3863     if (current_space->local_remaining<bnzi) {
3864       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3865       nspacedouble++;
3866     }
3867     /* copy data into free space, then initialize lnk */
3868     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3869     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3870 
3871     current_space->array           += bnzi;
3872     current_space->local_used      += bnzi;
3873     current_space->local_remaining -= bnzi;
3874 
3875     bi[i+1] = bi[i] + bnzi;
3876   }
3877 
3878   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3879 
3880   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3881   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3882   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3883 
3884   /* create symbolic parallel matrix B_mpi */
3885   /*---------------------------------------*/
3886   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3887   if (n==PETSC_DECIDE) {
3888     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3889   } else {
3890     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3891   }
3892   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3893   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3894   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3895 
3896   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3897   B_mpi->assembled     = PETSC_FALSE;
3898   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3899   merge->bi            = bi;
3900   merge->bj            = bj;
3901   merge->buf_ri        = buf_ri;
3902   merge->buf_rj        = buf_rj;
3903   merge->coi           = PETSC_NULL;
3904   merge->coj           = PETSC_NULL;
3905   merge->owners_co     = PETSC_NULL;
3906 
3907   /* attach the supporting struct to B_mpi for reuse */
3908   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3909   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3910   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3911   *mpimat = B_mpi;
3912 
3913   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3914   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 static PetscEvent logkey_seqstompi = 0;
3919 #undef __FUNCT__
3920 #define __FUNCT__ "MatMerge_SeqsToMPI"
3921 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3922 {
3923   PetscErrorCode   ierr;
3924 
3925   PetscFunctionBegin;
3926   if (!logkey_seqstompi) {
3927     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3928   }
3929   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3930   if (scall == MAT_INITIAL_MATRIX){
3931     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3932   }
3933   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3934   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3935   PetscFunctionReturn(0);
3936 }
3937 static PetscEvent logkey_getlocalmat = 0;
3938 #undef __FUNCT__
3939 #define __FUNCT__ "MatGetLocalMat"
3940 /*@C
3941      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3942 
3943     Not Collective
3944 
3945    Input Parameters:
3946 +    A - the matrix
3947 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3948 
3949    Output Parameter:
3950 .    A_loc - the local sequential matrix generated
3951 
3952     Level: developer
3953 
3954 @*/
3955 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3956 {
3957   PetscErrorCode  ierr;
3958   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3959   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3960   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3961   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3962   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3963   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3964 
3965   PetscFunctionBegin;
3966   if (!logkey_getlocalmat) {
3967     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3968   }
3969   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3970   if (scall == MAT_INITIAL_MATRIX){
3971     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3972     ci[0] = 0;
3973     for (i=0; i<am; i++){
3974       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3975     }
3976     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3977     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3978     k = 0;
3979     for (i=0; i<am; i++) {
3980       ncols_o = bi[i+1] - bi[i];
3981       ncols_d = ai[i+1] - ai[i];
3982       /* off-diagonal portion of A */
3983       for (jo=0; jo<ncols_o; jo++) {
3984         col = cmap[*bj];
3985         if (col >= cstart) break;
3986         cj[k]   = col; bj++;
3987         ca[k++] = *ba++;
3988       }
3989       /* diagonal portion of A */
3990       for (j=0; j<ncols_d; j++) {
3991         cj[k]   = cstart + *aj++;
3992         ca[k++] = *aa++;
3993       }
3994       /* off-diagonal portion of A */
3995       for (j=jo; j<ncols_o; j++) {
3996         cj[k]   = cmap[*bj++];
3997         ca[k++] = *ba++;
3998       }
3999     }
4000     /* put together the new matrix */
4001     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4002     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4003     /* Since these are PETSc arrays, change flags to free them as necessary. */
4004     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4005     mat->free_a  = PETSC_TRUE;
4006     mat->free_ij = PETSC_TRUE;
4007     mat->nonew   = 0;
4008   } else if (scall == MAT_REUSE_MATRIX){
4009     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4010     ci = mat->i; cj = mat->j; ca = mat->a;
4011     for (i=0; i<am; i++) {
4012       /* off-diagonal portion of A */
4013       ncols_o = bi[i+1] - bi[i];
4014       for (jo=0; jo<ncols_o; jo++) {
4015         col = cmap[*bj];
4016         if (col >= cstart) break;
4017         *ca++ = *ba++; bj++;
4018       }
4019       /* diagonal portion of A */
4020       ncols_d = ai[i+1] - ai[i];
4021       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4022       /* off-diagonal portion of A */
4023       for (j=jo; j<ncols_o; j++) {
4024         *ca++ = *ba++; bj++;
4025       }
4026     }
4027   } else {
4028     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4029   }
4030 
4031   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4032   PetscFunctionReturn(0);
4033 }
4034 
4035 static PetscEvent logkey_getlocalmatcondensed = 0;
4036 #undef __FUNCT__
4037 #define __FUNCT__ "MatGetLocalMatCondensed"
4038 /*@C
4039      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4040 
4041     Not Collective
4042 
4043    Input Parameters:
4044 +    A - the matrix
4045 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4046 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4047 
4048    Output Parameter:
4049 .    A_loc - the local sequential matrix generated
4050 
4051     Level: developer
4052 
4053 @*/
4054 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4055 {
4056   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4057   PetscErrorCode    ierr;
4058   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4059   IS                isrowa,iscola;
4060   Mat               *aloc;
4061 
4062   PetscFunctionBegin;
4063   if (!logkey_getlocalmatcondensed) {
4064     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4065   }
4066   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4067   if (!row){
4068     start = A->rmap.rstart; end = A->rmap.rend;
4069     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4070   } else {
4071     isrowa = *row;
4072   }
4073   if (!col){
4074     start = A->cmap.rstart;
4075     cmap  = a->garray;
4076     nzA   = a->A->cmap.n;
4077     nzB   = a->B->cmap.n;
4078     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4079     ncols = 0;
4080     for (i=0; i<nzB; i++) {
4081       if (cmap[i] < start) idx[ncols++] = cmap[i];
4082       else break;
4083     }
4084     imark = i;
4085     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4086     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4087     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4088     ierr = PetscFree(idx);CHKERRQ(ierr);
4089   } else {
4090     iscola = *col;
4091   }
4092   if (scall != MAT_INITIAL_MATRIX){
4093     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4094     aloc[0] = *A_loc;
4095   }
4096   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4097   *A_loc = aloc[0];
4098   ierr = PetscFree(aloc);CHKERRQ(ierr);
4099   if (!row){
4100     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4101   }
4102   if (!col){
4103     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4104   }
4105   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4106   PetscFunctionReturn(0);
4107 }
4108 
4109 static PetscEvent logkey_GetBrowsOfAcols = 0;
4110 #undef __FUNCT__
4111 #define __FUNCT__ "MatGetBrowsOfAcols"
4112 /*@C
4113     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4114 
4115     Collective on Mat
4116 
4117    Input Parameters:
4118 +    A,B - the matrices in mpiaij format
4119 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4120 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4121 
4122    Output Parameter:
4123 +    rowb, colb - index sets of rows and columns of B to extract
4124 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4125 -    B_seq - the sequential matrix generated
4126 
4127     Level: developer
4128 
4129 @*/
4130 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4131 {
4132   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4133   PetscErrorCode    ierr;
4134   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4135   IS                isrowb,iscolb;
4136   Mat               *bseq;
4137 
4138   PetscFunctionBegin;
4139   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4140     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4141   }
4142   if (!logkey_GetBrowsOfAcols) {
4143     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4144   }
4145   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4146 
4147   if (scall == MAT_INITIAL_MATRIX){
4148     start = A->cmap.rstart;
4149     cmap  = a->garray;
4150     nzA   = a->A->cmap.n;
4151     nzB   = a->B->cmap.n;
4152     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4153     ncols = 0;
4154     for (i=0; i<nzB; i++) {  /* row < local row index */
4155       if (cmap[i] < start) idx[ncols++] = cmap[i];
4156       else break;
4157     }
4158     imark = i;
4159     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4160     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4161     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4162     ierr = PetscFree(idx);CHKERRQ(ierr);
4163     *brstart = imark;
4164     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4165   } else {
4166     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4167     isrowb = *rowb; iscolb = *colb;
4168     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4169     bseq[0] = *B_seq;
4170   }
4171   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4172   *B_seq = bseq[0];
4173   ierr = PetscFree(bseq);CHKERRQ(ierr);
4174   if (!rowb){
4175     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4176   } else {
4177     *rowb = isrowb;
4178   }
4179   if (!colb){
4180     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4181   } else {
4182     *colb = iscolb;
4183   }
4184   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4185   PetscFunctionReturn(0);
4186 }
4187 
4188 static PetscEvent logkey_GetBrowsOfAocols = 0;
4189 #undef __FUNCT__
4190 #define __FUNCT__ "MatGetBrowsOfAoCols"
4191 /*@C
4192     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4193     of the OFF-DIAGONAL portion of local A
4194 
4195     Collective on Mat
4196 
4197    Input Parameters:
4198 +    A,B - the matrices in mpiaij format
4199 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4200 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4201 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4202 
4203    Output Parameter:
4204 +    B_oth - the sequential matrix generated
4205 
4206     Level: developer
4207 
4208 @*/
4209 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4210 {
4211   VecScatter_MPI_General *gen_to,*gen_from;
4212   PetscErrorCode         ierr;
4213   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4214   Mat_SeqAIJ             *b_oth;
4215   VecScatter             ctx=a->Mvctx;
4216   MPI_Comm               comm=ctx->comm;
4217   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4218   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4219   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4220   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4221   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4222   MPI_Status             *sstatus,rstatus;
4223   PetscMPIInt            jj;
4224   PetscInt               *cols,sbs,rbs;
4225   PetscScalar            *vals;
4226 
4227   PetscFunctionBegin;
4228   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4229     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4230   }
4231   if (!logkey_GetBrowsOfAocols) {
4232     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4233   }
4234   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4235   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4236 
4237   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4238   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4239   rvalues  = gen_from->values; /* holds the length of receiving row */
4240   svalues  = gen_to->values;   /* holds the length of sending row */
4241   nrecvs   = gen_from->n;
4242   nsends   = gen_to->n;
4243 
4244   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4245   srow     = gen_to->indices;   /* local row index to be sent */
4246   sstarts  = gen_to->starts;
4247   sprocs   = gen_to->procs;
4248   sstatus  = gen_to->sstatus;
4249   sbs      = gen_to->bs;
4250   rstarts  = gen_from->starts;
4251   rprocs   = gen_from->procs;
4252   rbs      = gen_from->bs;
4253 
4254   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4255   if (scall == MAT_INITIAL_MATRIX){
4256     /* i-array */
4257     /*---------*/
4258     /*  post receives */
4259     for (i=0; i<nrecvs; i++){
4260       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4261       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4262       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4263     }
4264 
4265     /* pack the outgoing message */
4266     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4267     rstartsj = sstartsj + nsends +1;
4268     sstartsj[0] = 0;  rstartsj[0] = 0;
4269     len = 0; /* total length of j or a array to be sent */
4270     k = 0;
4271     for (i=0; i<nsends; i++){
4272       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4273       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4274       for (j=0; j<nrows; j++) {
4275         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4276         for (l=0; l<sbs; l++){
4277           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4278           rowlen[j*sbs+l] = ncols;
4279           len += ncols;
4280           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4281         }
4282         k++;
4283       }
4284       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4285       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4286     }
4287     /* recvs and sends of i-array are completed */
4288     i = nrecvs;
4289     while (i--) {
4290       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4291     }
4292     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4293 
4294     /* allocate buffers for sending j and a arrays */
4295     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4296     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4297 
4298     /* create i-array of B_oth */
4299     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4300     b_othi[0] = 0;
4301     len = 0; /* total length of j or a array to be received */
4302     k = 0;
4303     for (i=0; i<nrecvs; i++){
4304       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4305       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4306       for (j=0; j<nrows; j++) {
4307         b_othi[k+1] = b_othi[k] + rowlen[j];
4308         len += rowlen[j]; k++;
4309       }
4310       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4311     }
4312 
4313     /* allocate space for j and a arrrays of B_oth */
4314     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4315     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4316 
4317     /* j-array */
4318     /*---------*/
4319     /*  post receives of j-array */
4320     for (i=0; i<nrecvs; i++){
4321       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4322       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4323     }
4324 
4325     /* pack the outgoing message j-array */
4326     k = 0;
4327     for (i=0; i<nsends; i++){
4328       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4329       bufJ = bufj+sstartsj[i];
4330       for (j=0; j<nrows; j++) {
4331         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4332         for (ll=0; ll<sbs; ll++){
4333           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4334           for (l=0; l<ncols; l++){
4335             *bufJ++ = cols[l];
4336           }
4337           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4338         }
4339       }
4340       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4341     }
4342 
4343     /* recvs and sends of j-array are completed */
4344     i = nrecvs;
4345     while (i--) {
4346       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4347     }
4348     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4349   } else if (scall == MAT_REUSE_MATRIX){
4350     sstartsj = *startsj;
4351     rstartsj = sstartsj + nsends +1;
4352     bufa     = *bufa_ptr;
4353     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4354     b_otha   = b_oth->a;
4355   } else {
4356     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4357   }
4358 
4359   /* a-array */
4360   /*---------*/
4361   /*  post receives of a-array */
4362   for (i=0; i<nrecvs; i++){
4363     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4364     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4365   }
4366 
4367   /* pack the outgoing message a-array */
4368   k = 0;
4369   for (i=0; i<nsends; i++){
4370     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4371     bufA = bufa+sstartsj[i];
4372     for (j=0; j<nrows; j++) {
4373       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4374       for (ll=0; ll<sbs; ll++){
4375         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4376         for (l=0; l<ncols; l++){
4377           *bufA++ = vals[l];
4378         }
4379         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4380       }
4381     }
4382     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4383   }
4384   /* recvs and sends of a-array are completed */
4385   i = nrecvs;
4386   while (i--) {
4387     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4388   }
4389   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4390   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4391 
4392   if (scall == MAT_INITIAL_MATRIX){
4393     /* put together the new matrix */
4394     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4395 
4396     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4397     /* Since these are PETSc arrays, change flags to free them as necessary. */
4398     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4399     b_oth->free_a  = PETSC_TRUE;
4400     b_oth->free_ij = PETSC_TRUE;
4401     b_oth->nonew   = 0;
4402 
4403     ierr = PetscFree(bufj);CHKERRQ(ierr);
4404     if (!startsj || !bufa_ptr){
4405       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4406       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4407     } else {
4408       *startsj  = sstartsj;
4409       *bufa_ptr = bufa;
4410     }
4411   }
4412   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4413   PetscFunctionReturn(0);
4414 }
4415 
4416 #undef __FUNCT__
4417 #define __FUNCT__ "MatGetCommunicationStructs"
4418 /*@C
4419   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4420 
4421   Not Collective
4422 
4423   Input Parameters:
4424 . A - The matrix in mpiaij format
4425 
4426   Output Parameter:
4427 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4428 . colmap - A map from global column index to local index into lvec
4429 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4430 
4431   Level: developer
4432 
4433 @*/
4434 #if defined (PETSC_USE_CTABLE)
4435 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4436 #else
4437 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4438 #endif
4439 {
4440   Mat_MPIAIJ *a;
4441 
4442   PetscFunctionBegin;
4443   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4444   PetscValidPointer(lvec, 2)
4445   PetscValidPointer(colmap, 3)
4446   PetscValidPointer(multScatter, 4)
4447   a = (Mat_MPIAIJ *) A->data;
4448   if (lvec) *lvec = a->lvec;
4449   if (colmap) *colmap = a->colmap;
4450   if (multScatter) *multScatter = a->Mvctx;
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 EXTERN_C_BEGIN
4455 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4456 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4457 EXTERN_C_END
4458 
4459 /*MC
4460    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4461 
4462    Options Database Keys:
4463 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4464 
4465   Level: beginner
4466 
4467 .seealso: MatCreateMPIAIJ
4468 M*/
4469 
4470 EXTERN_C_BEGIN
4471 #undef __FUNCT__
4472 #define __FUNCT__ "MatCreate_MPIAIJ"
4473 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4474 {
4475   Mat_MPIAIJ     *b;
4476   PetscErrorCode ierr;
4477   PetscMPIInt    size;
4478 
4479   PetscFunctionBegin;
4480   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4481 
4482   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4483   B->data         = (void*)b;
4484   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4485   B->factor       = 0;
4486   B->rmap.bs      = 1;
4487   B->assembled    = PETSC_FALSE;
4488   B->mapping      = 0;
4489 
4490   B->insertmode      = NOT_SET_VALUES;
4491   b->size            = size;
4492   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4493 
4494   /* build cache for off array entries formed */
4495   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4496   b->donotstash  = PETSC_FALSE;
4497   b->colmap      = 0;
4498   b->garray      = 0;
4499   b->roworiented = PETSC_TRUE;
4500 
4501   /* stuff used for matrix vector multiply */
4502   b->lvec      = PETSC_NULL;
4503   b->Mvctx     = PETSC_NULL;
4504 
4505   /* stuff for MatGetRow() */
4506   b->rowindices   = 0;
4507   b->rowvalues    = 0;
4508   b->getrowactive = PETSC_FALSE;
4509 
4510 
4511   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4512                                      "MatStoreValues_MPIAIJ",
4513                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4514   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4515                                      "MatRetrieveValues_MPIAIJ",
4516                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4518 				     "MatGetDiagonalBlock_MPIAIJ",
4519                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4520   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4521 				     "MatIsTranspose_MPIAIJ",
4522 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4523   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4524 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4525 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4526   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4527 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4528 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4529   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4530 				     "MatDiagonalScaleLocal_MPIAIJ",
4531 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4532   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4533                                      "MatConvert_MPIAIJ_MPICSRPERM",
4534                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4535   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4536                                      "MatConvert_MPIAIJ_MPICRL",
4537                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4538   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4539   PetscFunctionReturn(0);
4540 }
4541 EXTERN_C_END
4542 
4543 #undef __FUNCT__
4544 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4545 /*@C
4546      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4547          and "off-diagonal" part of the matrix in CSR format.
4548 
4549    Collective on MPI_Comm
4550 
4551    Input Parameters:
4552 +  comm - MPI communicator
4553 .  m - number of local rows (Cannot be PETSC_DECIDE)
4554 .  n - This value should be the same as the local size used in creating the
4555        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4556        calculated if N is given) For square matrices n is almost always m.
4557 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4558 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4559 .   i - row indices for "diagonal" portion of matrix
4560 .   j - column indices
4561 .   a - matrix values
4562 .   oi - row indices for "off-diagonal" portion of matrix
4563 .   oj - column indices
4564 -   oa - matrix values
4565 
4566    Output Parameter:
4567 .   mat - the matrix
4568 
4569    Level: advanced
4570 
4571    Notes:
4572        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4573 
4574        The i and j indices are 0 based
4575 
4576        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4577 
4578 
4579 .keywords: matrix, aij, compressed row, sparse, parallel
4580 
4581 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4582           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4583 @*/
4584 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4585 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4586 {
4587   PetscErrorCode ierr;
4588   Mat_MPIAIJ     *maij;
4589 
4590  PetscFunctionBegin;
4591   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4592   if (i[0]) {
4593     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4594   }
4595   if (oi[0]) {
4596     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4597   }
4598   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4599   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4600   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4601   maij = (Mat_MPIAIJ*) (*mat)->data;
4602   maij->donotstash     = PETSC_TRUE;
4603   (*mat)->preallocated = PETSC_TRUE;
4604 
4605   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4606   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4607   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4608 
4609   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4610   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4611 
4612   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4613   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4614   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4615   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4616 
4617   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4618   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4619   PetscFunctionReturn(0);
4620 }
4621 
4622 /*
4623     Special version for direct calls from Fortran
4624 */
4625 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4626 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4627 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4628 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4629 #endif
4630 
4631 /* Change these macros so can be used in void function */
4632 #undef CHKERRQ
4633 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4634 #undef SETERRQ2
4635 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4636 #undef SETERRQ
4637 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4638 
4639 EXTERN_C_BEGIN
4640 #undef __FUNCT__
4641 #define __FUNCT__ "matsetvaluesmpiaij_"
4642 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4643 {
4644   Mat            mat = *mmat;
4645   PetscInt       m = *mm, n = *mn;
4646   InsertMode     addv = *maddv;
4647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4648   PetscScalar    value;
4649   PetscErrorCode ierr;
4650 
4651   MatPreallocated(mat);
4652   if (mat->insertmode == NOT_SET_VALUES) {
4653     mat->insertmode = addv;
4654   }
4655 #if defined(PETSC_USE_DEBUG)
4656   else if (mat->insertmode != addv) {
4657     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4658   }
4659 #endif
4660   {
4661   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4662   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4663   PetscTruth     roworiented = aij->roworiented;
4664 
4665   /* Some Variables required in the macro */
4666   Mat            A = aij->A;
4667   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4668   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4669   PetscScalar    *aa = a->a;
4670   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4671   Mat            B = aij->B;
4672   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4673   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4674   PetscScalar    *ba = b->a;
4675 
4676   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4677   PetscInt       nonew = a->nonew;
4678   PetscScalar    *ap1,*ap2;
4679 
4680   PetscFunctionBegin;
4681   for (i=0; i<m; i++) {
4682     if (im[i] < 0) continue;
4683 #if defined(PETSC_USE_DEBUG)
4684     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4685 #endif
4686     if (im[i] >= rstart && im[i] < rend) {
4687       row      = im[i] - rstart;
4688       lastcol1 = -1;
4689       rp1      = aj + ai[row];
4690       ap1      = aa + ai[row];
4691       rmax1    = aimax[row];
4692       nrow1    = ailen[row];
4693       low1     = 0;
4694       high1    = nrow1;
4695       lastcol2 = -1;
4696       rp2      = bj + bi[row];
4697       ap2      = ba + bi[row];
4698       rmax2    = bimax[row];
4699       nrow2    = bilen[row];
4700       low2     = 0;
4701       high2    = nrow2;
4702 
4703       for (j=0; j<n; j++) {
4704         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4705         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4706         if (in[j] >= cstart && in[j] < cend){
4707           col = in[j] - cstart;
4708           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4709         } else if (in[j] < 0) continue;
4710 #if defined(PETSC_USE_DEBUG)
4711         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4712 #endif
4713         else {
4714           if (mat->was_assembled) {
4715             if (!aij->colmap) {
4716               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4717             }
4718 #if defined (PETSC_USE_CTABLE)
4719             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4720 	    col--;
4721 #else
4722             col = aij->colmap[in[j]] - 1;
4723 #endif
4724             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4725               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4726               col =  in[j];
4727               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4728               B = aij->B;
4729               b = (Mat_SeqAIJ*)B->data;
4730               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4731               rp2      = bj + bi[row];
4732               ap2      = ba + bi[row];
4733               rmax2    = bimax[row];
4734               nrow2    = bilen[row];
4735               low2     = 0;
4736               high2    = nrow2;
4737               bm       = aij->B->rmap.n;
4738               ba = b->a;
4739             }
4740           } else col = in[j];
4741           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4742         }
4743       }
4744     } else {
4745       if (!aij->donotstash) {
4746         if (roworiented) {
4747           if (ignorezeroentries && v[i*n] == 0.0) continue;
4748           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4749         } else {
4750           if (ignorezeroentries && v[i] == 0.0) continue;
4751           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4752         }
4753       }
4754     }
4755   }}
4756   PetscFunctionReturnVoid();
4757 }
4758 EXTERN_C_END
4759 
4760