xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fbd52ca3e6e2e8d390f41e1ad6363b9b3ca42def)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRelax_MPIAIJ"
1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1018 {
1019   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscErrorCode ierr;
1021   Vec            bb1;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatPermute_MPIAIJ"
1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1091 {
1092   MPI_Comm       comm,pcomm;
1093   PetscInt       first,local_size,nrows,*rows;
1094   int            ntids;
1095   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1100   /* make a collective version of 'rowp' */
1101   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1102   if (pcomm==comm) {
1103     crowp = rowp;
1104   } else {
1105     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1106     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1107     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1108     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1109   }
1110   /* collect the global row permutation and invert it */
1111   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1112   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1113   if (pcomm!=comm) {
1114     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1115   }
1116   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1117   /* get the local target indices */
1118   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1122   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1123   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1124   /* the column permutation is so much easier;
1125      make a local version of 'colp' and invert it */
1126   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1127   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1128   if (ntids==1) {
1129     lcolp = colp;
1130   } else {
1131     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1132     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1134   }
1135   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1136   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1137   if (ntids>1) {
1138     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1139     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1140   }
1141   /* now we just get the submatrix */
1142   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1143   /* clean up */
1144   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1145   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1152 {
1153   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1154   Mat            A = mat->A,B = mat->B;
1155   PetscErrorCode ierr;
1156   PetscReal      isend[5],irecv[5];
1157 
1158   PetscFunctionBegin;
1159   info->block_size     = 1.0;
1160   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1162   isend[3] = info->memory;  isend[4] = info->mallocs;
1163   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1165   isend[3] += info->memory;  isend[4] += info->mallocs;
1166   if (flag == MAT_LOCAL) {
1167     info->nz_used      = isend[0];
1168     info->nz_allocated = isend[1];
1169     info->nz_unneeded  = isend[2];
1170     info->memory       = isend[3];
1171     info->mallocs      = isend[4];
1172   } else if (flag == MAT_GLOBAL_MAX) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else if (flag == MAT_GLOBAL_SUM) {
1180     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1181     info->nz_used      = irecv[0];
1182     info->nz_allocated = irecv[1];
1183     info->nz_unneeded  = irecv[2];
1184     info->memory       = irecv[3];
1185     info->mallocs      = irecv[4];
1186   }
1187   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1188   info->fill_ratio_needed = 0;
1189   info->factor_mallocs    = 0;
1190   info->rows_global       = (double)matin->rmap.N;
1191   info->columns_global    = (double)matin->cmap.N;
1192   info->rows_local        = (double)matin->rmap.n;
1193   info->columns_local     = (double)matin->cmap.N;
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPIAIJ"
1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   switch (op) {
1207   case MAT_NO_NEW_NONZERO_LOCATIONS:
1208   case MAT_YES_NEW_NONZERO_LOCATIONS:
1209   case MAT_COLUMNS_UNSORTED:
1210   case MAT_COLUMNS_SORTED:
1211   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1212   case MAT_KEEP_ZEROED_ROWS:
1213   case MAT_NEW_NONZERO_LOCATION_ERR:
1214   case MAT_USE_INODES:
1215   case MAT_DO_NOT_USE_INODES:
1216   case MAT_IGNORE_ZERO_ENTRIES:
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROW_ORIENTED:
1221     a->roworiented = PETSC_TRUE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_ROWS_SORTED:
1226   case MAT_ROWS_UNSORTED:
1227   case MAT_YES_NEW_DIAGONALS:
1228     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1229     break;
1230   case MAT_COLUMN_ORIENTED:
1231     a->roworiented = PETSC_FALSE;
1232     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1233     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_OFF_PROC_ENTRIES:
1236     a->donotstash = PETSC_TRUE;
1237     break;
1238   case MAT_NO_NEW_DIAGONALS:
1239     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     break;
1248   case MAT_NOT_SYMMETRIC:
1249   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1250   case MAT_NOT_HERMITIAN:
1251   case MAT_NOT_SYMMETRY_ETERNAL:
1252     break;
1253   default:
1254     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRow_MPIAIJ"
1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1264   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265   PetscErrorCode ierr;
1266   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1267   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1268   PetscInt       *cmap,*idx_p;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1279     PetscInt     max = 1,tmp;
1280     for (i=0; i<matin->rmap.n; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1285     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1286   }
1287 
1288   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1289   lrow = row - rstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       PetscInt imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for (i=0; i<nzB; i++) {
1306           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1311         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for (i=0; i<imark; i++) {
1317             idx_p[i] = cmap[cworkB[i]];
1318           }
1319         } else {
1320           for (i=0; i<nzB; i++) {
1321             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!aij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1349   }
1350   aij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatNorm_MPIAIJ"
1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1357 {
1358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1359   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,j,cstart = mat->cmap.rstart;
1362   PetscReal      sum = 0.0;
1363   PetscScalar    *v;
1364 
1365   PetscFunctionBegin;
1366   if (aij->size == 1) {
1367     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1368   } else {
1369     if (type == NORM_FROBENIUS) {
1370       v = amat->a;
1371       for (i=0; i<amat->nz; i++) {
1372 #if defined(PETSC_USE_COMPLEX)
1373         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1374 #else
1375         sum += (*v)*(*v); v++;
1376 #endif
1377       }
1378       v = bmat->a;
1379       for (i=0; i<bmat->nz; i++) {
1380 #if defined(PETSC_USE_COMPLEX)
1381         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1382 #else
1383         sum += (*v)*(*v); v++;
1384 #endif
1385       }
1386       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       *norm = sqrt(*norm);
1388     } else if (type == NORM_1) { /* max column norm */
1389       PetscReal *tmp,*tmp2;
1390       PetscInt    *jj,*garray = aij->garray;
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1393       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1394       *norm = 0.0;
1395       v = amat->a; jj = amat->j;
1396       for (j=0; j<amat->nz; j++) {
1397         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1398       }
1399       v = bmat->a; jj = bmat->j;
1400       for (j=0; j<bmat->nz; j++) {
1401         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1402       }
1403       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1404       for (j=0; j<mat->cmap.N; j++) {
1405         if (tmp2[j] > *norm) *norm = tmp2[j];
1406       }
1407       ierr = PetscFree(tmp);CHKERRQ(ierr);
1408       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1409     } else if (type == NORM_INFINITY) { /* max row norm */
1410       PetscReal ntemp = 0.0;
1411       for (j=0; j<aij->A->rmap.n; j++) {
1412         v = amat->a + amat->i[j];
1413         sum = 0.0;
1414         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1415           sum += PetscAbsScalar(*v); v++;
1416         }
1417         v = bmat->a + bmat->i[j];
1418         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1419           sum += PetscAbsScalar(*v); v++;
1420         }
1421         if (sum > ntemp) ntemp = sum;
1422       }
1423       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1424     } else {
1425       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1426     }
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatTranspose_MPIAIJ"
1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1434 {
1435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1436   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1437   PetscErrorCode ierr;
1438   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) {
1444     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445   }
1446 
1447   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1448   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1449   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1450   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1451 
1452   /* copy over the A part */
1453   Aloc = (Mat_SeqAIJ*)a->A->data;
1454   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1457   for (i=0; i<m; i++) {
1458     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1459     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1460   }
1461   aj = Aloc->j;
1462   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1463 
1464   /* copy over the B part */
1465   Aloc = (Mat_SeqAIJ*)a->B->data;
1466   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1467   row  = A->rmap.rstart;
1468   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1469   ct   = cols;
1470   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1471   for (i=0; i<m; i++) {
1472     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1473     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1474   }
1475   ierr = PetscFree(ct);CHKERRQ(ierr);
1476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   if (matout) {
1479     *matout = B;
1480   } else {
1481     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1489 {
1490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1491   Mat            a = aij->A,b = aij->B;
1492   PetscErrorCode ierr;
1493   PetscInt       s1,s2,s3;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1497   if (rr) {
1498     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1499     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1500     /* Overlap communication with computation. */
1501     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1502   }
1503   if (ll) {
1504     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1505     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1506     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1507   }
1508   /* scale  the diagonal block */
1509   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1510 
1511   if (rr) {
1512     /* Do a scatter end and then right scale the off-diagonal block */
1513     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1514     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1515   }
1516 
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1529   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1535 {
1536   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatEqual_MPIAIJ"
1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1547 {
1548   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1549   Mat            a,b,c,d;
1550   PetscTruth     flg;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   a = matA->A; b = matA->B;
1555   c = matB->A; d = matB->B;
1556 
1557   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1558   if (flg) {
1559     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1560   }
1561   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatCopy_MPIAIJ"
1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1568 {
1569   PetscErrorCode ierr;
1570   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1571   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1572 
1573   PetscFunctionBegin;
1574   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1575   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1576     /* because of the column compression in the off-processor part of the matrix a->B,
1577        the number of columns in a->B and b->B may be different, hence we cannot call
1578        the MatCopy() directly on the two parts. If need be, we can provide a more
1579        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1580        then copying the submatrices */
1581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1582   } else {
1583     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1584     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1592 {
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #include "petscblaslapack.h"
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatAXPY_MPIAIJ"
1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1604 {
1605   PetscErrorCode ierr;
1606   PetscInt       i;
1607   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1608   PetscBLASInt   bnz,one=1;
1609   Mat_SeqAIJ     *x,*y;
1610 
1611   PetscFunctionBegin;
1612   if (str == SAME_NONZERO_PATTERN) {
1613     PetscScalar alpha = a;
1614     x = (Mat_SeqAIJ *)xx->A->data;
1615     y = (Mat_SeqAIJ *)yy->A->data;
1616     bnz = (PetscBLASInt)x->nz;
1617     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1618     x = (Mat_SeqAIJ *)xx->B->data;
1619     y = (Mat_SeqAIJ *)yy->B->data;
1620     bnz = (PetscBLASInt)x->nz;
1621     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1622   } else if (str == SUBSET_NONZERO_PATTERN) {
1623     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1624 
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     if (y->xtoy && y->XtoY != xx->B) {
1628       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1629       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1630     }
1631     if (!y->xtoy) { /* get xtoy */
1632       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1633       y->XtoY = xx->B;
1634     }
1635     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1636   } else {
1637     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1638   }
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatConjugate_MPIAIJ"
1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1647 {
1648 #if defined(PETSC_USE_COMPLEX)
1649   PetscErrorCode ierr;
1650   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1654   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1655 #else
1656   PetscFunctionBegin;
1657 #endif
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatRealPart_MPIAIJ"
1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1664 {
1665   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1670   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1677 {
1678   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1683   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #ifdef PETSC_HAVE_PBGL
1688 #include <boost/parallel/mpi/bsp_process_group.hpp>
1689 typedef boost::parallel::mpi::bsp_process_group            process_group_type;
1690 
1691 #include <boost/graph/distributed/adjacency_list.hpp>
1692 #include <boost/parallel/mpi/bsp_process_group.hpp>
1693 
1694 #include <boost/graph/distributed/petsc/interface.hpp>
1695 #include <boost/graph/distributed/ilu_0.hpp>
1696 
1697 namespace petsc = boost::distributed::petsc;
1698 using namespace std;
1699 typedef double                                             value_type;
1700 typedef boost::graph::distributed::ilu_elimination_state   elimination_state;
1701 typedef boost::adjacency_list<boost::listS,
1702                        boost::distributedS<process_group_type, boost::vecS>,
1703                        boost::bidirectionalS,
1704                        // Vertex properties
1705                        boost::no_property,
1706                        // Edge properties
1707                        boost::property<boost::edge_weight_t, value_type,
1708                          boost::property<boost::edge_finished_t, elimination_state> > > graph_type;
1709 
1710 typedef boost::graph_traits<graph_type>::vertex_descriptor        vertex_type;
1711 typedef boost::graph_traits<graph_type>::edge_descriptor          edge_type;
1712 typedef boost::property_map<graph_type, boost::edge_weight_t>::type      weight_map_type;
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1716 /*
1717   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1718 */
1719 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1720 {
1721   PetscTruth           row_identity, col_identity;
1722   PetscObjectContainer c;
1723   PetscInt             m, n, M, N;
1724   PetscErrorCode       ierr;
1725 
1726   PetscFunctionBegin;
1727   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1728   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1729   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1730   if (!row_identity || !col_identity) {
1731     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1732   }
1733 
1734   process_group_type pg;
1735   graph_type*        graph_p = new graph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1736   graph_type&        graph   = *graph_p;
1737   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1738 
1739   //write_graphviz("petsc_matrix_as_graph.dot", graph, default_writer(), matrix_graph_writer<graph_type>(graph));
1740   boost::property_map<graph_type, boost::edge_finished_t>::type finished = get(boost::edge_finished, graph);
1741   BGL_FORALL_EDGES(e, graph, graph_type)
1742     put(finished, e, boost::graph::distributed::unseen);
1743 
1744   ilu_0(graph, get(boost::edge_weight, graph), get(boost::edge_finished, graph));
1745 
1746   /* put together the new matrix */
1747   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1748   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1749   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1750   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1751   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1752   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1753   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1754   (*fact)->factor = FACTOR_LU;
1755 
1756   ierr = PetscObjectContainerCreate(A->comm, &c);
1757   ierr = PetscObjectContainerSetPointer(c, graph_p);
1758   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1764 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1765 {
1766   PetscFunctionBegin;
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatSolve_MPIAIJ"
1772 /*
1773   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1774 */
1775 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1776 {
1777   graph_type*          graph_p;
1778   PetscObjectContainer c;
1779   PetscErrorCode       ierr;
1780 
1781   PetscFunctionBegin;
1782   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1783   ierr = PetscObjectContainerGetPointer(c, (void **) &graph_p);CHKERRQ(ierr);
1784   ierr = VecCopy(b, x); CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 #endif
1788 
1789 /* -------------------------------------------------------------------*/
1790 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1791        MatGetRow_MPIAIJ,
1792        MatRestoreRow_MPIAIJ,
1793        MatMult_MPIAIJ,
1794 /* 4*/ MatMultAdd_MPIAIJ,
1795        MatMultTranspose_MPIAIJ,
1796        MatMultTransposeAdd_MPIAIJ,
1797 #ifdef PETSC_HAVE_PBGL
1798        MatSolve_MPIAIJ,
1799 #else
1800        0,
1801 #endif
1802        0,
1803        0,
1804 /*10*/ 0,
1805        0,
1806        0,
1807        MatRelax_MPIAIJ,
1808        MatTranspose_MPIAIJ,
1809 /*15*/ MatGetInfo_MPIAIJ,
1810        MatEqual_MPIAIJ,
1811        MatGetDiagonal_MPIAIJ,
1812        MatDiagonalScale_MPIAIJ,
1813        MatNorm_MPIAIJ,
1814 /*20*/ MatAssemblyBegin_MPIAIJ,
1815        MatAssemblyEnd_MPIAIJ,
1816        0,
1817        MatSetOption_MPIAIJ,
1818        MatZeroEntries_MPIAIJ,
1819 /*25*/ MatZeroRows_MPIAIJ,
1820        0,
1821 #ifdef PETSC_HAVE_PBGL
1822        MatLUFactorNumeric_MPIAIJ,
1823 #else
1824        0,
1825 #endif
1826        0,
1827        0,
1828 /*30*/ MatSetUpPreallocation_MPIAIJ,
1829 #ifdef PETSC_HAVE_PBGL
1830        MatILUFactorSymbolic_MPIAIJ,
1831 #else
1832        0,
1833 #endif
1834        0,
1835        0,
1836        0,
1837 /*35*/ MatDuplicate_MPIAIJ,
1838        0,
1839        0,
1840        0,
1841        0,
1842 /*40*/ MatAXPY_MPIAIJ,
1843        MatGetSubMatrices_MPIAIJ,
1844        MatIncreaseOverlap_MPIAIJ,
1845        MatGetValues_MPIAIJ,
1846        MatCopy_MPIAIJ,
1847 /*45*/ 0,
1848        MatScale_MPIAIJ,
1849        0,
1850        0,
1851        0,
1852 /*50*/ MatSetBlockSize_MPIAIJ,
1853        0,
1854        0,
1855        0,
1856        0,
1857 /*55*/ MatFDColoringCreate_MPIAIJ,
1858        0,
1859        MatSetUnfactored_MPIAIJ,
1860        MatPermute_MPIAIJ,
1861        0,
1862 /*60*/ MatGetSubMatrix_MPIAIJ,
1863        MatDestroy_MPIAIJ,
1864        MatView_MPIAIJ,
1865        0,
1866        0,
1867 /*65*/ 0,
1868        0,
1869        0,
1870        0,
1871        0,
1872 /*70*/ 0,
1873        0,
1874        MatSetColoring_MPIAIJ,
1875 #if defined(PETSC_HAVE_ADIC)
1876        MatSetValuesAdic_MPIAIJ,
1877 #else
1878        0,
1879 #endif
1880        MatSetValuesAdifor_MPIAIJ,
1881 /*75*/ 0,
1882        0,
1883        0,
1884        0,
1885        0,
1886 /*80*/ 0,
1887        0,
1888        0,
1889        0,
1890 /*84*/ MatLoad_MPIAIJ,
1891        0,
1892        0,
1893        0,
1894        0,
1895        0,
1896 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1897        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1898        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1899        MatPtAP_Basic,
1900        MatPtAPSymbolic_MPIAIJ,
1901 /*95*/ MatPtAPNumeric_MPIAIJ,
1902        0,
1903        0,
1904        0,
1905        0,
1906 /*100*/0,
1907        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1908        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1909        MatConjugate_MPIAIJ,
1910        0,
1911 /*105*/MatSetValuesRow_MPIAIJ,
1912        MatRealPart_MPIAIJ,
1913        MatImaginaryPart_MPIAIJ};
1914 
1915 /* ----------------------------------------------------------------------------------------*/
1916 
1917 EXTERN_C_BEGIN
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1920 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1921 {
1922   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1927   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 EXTERN_C_END
1931 
1932 EXTERN_C_BEGIN
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1935 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1936 {
1937   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1942   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 EXTERN_C_END
1946 
1947 #include "petscpc.h"
1948 EXTERN_C_BEGIN
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1951 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1952 {
1953   Mat_MPIAIJ     *b;
1954   PetscErrorCode ierr;
1955   PetscInt       i;
1956 
1957   PetscFunctionBegin;
1958   B->preallocated = PETSC_TRUE;
1959   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1960   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1961   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1962   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1963 
1964   B->rmap.bs = B->cmap.bs = 1;
1965   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1966   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1967   if (d_nnz) {
1968     for (i=0; i<B->rmap.n; i++) {
1969       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1970     }
1971   }
1972   if (o_nnz) {
1973     for (i=0; i<B->rmap.n; i++) {
1974       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1975     }
1976   }
1977   b = (Mat_MPIAIJ*)B->data;
1978 
1979   /* Explicitly create 2 MATSEQAIJ matrices. */
1980   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1981   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1982   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1983   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1984   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1985   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1986   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1987   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1988 
1989   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1990   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1991 
1992   PetscFunctionReturn(0);
1993 }
1994 EXTERN_C_END
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1998 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1999 {
2000   Mat            mat;
2001   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   *newmat       = 0;
2006   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2007   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2008   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2009   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2010   a    = (Mat_MPIAIJ*)mat->data;
2011 
2012   mat->factor       = matin->factor;
2013   mat->rmap.bs      = matin->rmap.bs;
2014   mat->assembled    = PETSC_TRUE;
2015   mat->insertmode   = NOT_SET_VALUES;
2016   mat->preallocated = PETSC_TRUE;
2017 
2018   a->size           = oldmat->size;
2019   a->rank           = oldmat->rank;
2020   a->donotstash     = oldmat->donotstash;
2021   a->roworiented    = oldmat->roworiented;
2022   a->rowindices     = 0;
2023   a->rowvalues      = 0;
2024   a->getrowactive   = PETSC_FALSE;
2025 
2026   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2027   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2028 
2029   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2030   if (oldmat->colmap) {
2031 #if defined (PETSC_USE_CTABLE)
2032     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2033 #else
2034     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2035     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2036     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2037 #endif
2038   } else a->colmap = 0;
2039   if (oldmat->garray) {
2040     PetscInt len;
2041     len  = oldmat->B->cmap.n;
2042     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2043     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2044     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2045   } else a->garray = 0;
2046 
2047   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2048   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2049   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2050   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2051   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2052   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2053   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2054   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2055   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2056   *newmat = mat;
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #include "petscsys.h"
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatLoad_MPIAIJ"
2064 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2065 {
2066   Mat            A;
2067   PetscScalar    *vals,*svals;
2068   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2069   MPI_Status     status;
2070   PetscErrorCode ierr;
2071   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2072   PetscInt       i,nz,j,rstart,rend,mmax;
2073   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2074   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2075   PetscInt       cend,cstart,n,*rowners;
2076   int            fd;
2077 
2078   PetscFunctionBegin;
2079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2080   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2081   if (!rank) {
2082     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2083     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2084     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2085   }
2086 
2087   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2088   M = header[1]; N = header[2];
2089   /* determine ownership of all rows */
2090   m    = M/size + ((M % size) > rank);
2091   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2092   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2093 
2094   /* First process needs enough room for process with most rows */
2095   if (!rank) {
2096     mmax       = rowners[1];
2097     for (i=2; i<size; i++) {
2098       mmax = PetscMax(mmax,rowners[i]);
2099     }
2100   } else mmax = m;
2101 
2102   rowners[0] = 0;
2103   for (i=2; i<=size; i++) {
2104     mmax       = PetscMax(mmax,rowners[i]);
2105     rowners[i] += rowners[i-1];
2106   }
2107   rstart = rowners[rank];
2108   rend   = rowners[rank+1];
2109 
2110   /* distribute row lengths to all processors */
2111   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2112   if (!rank) {
2113     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2114     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2115     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2116     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2117     for (j=0; j<m; j++) {
2118       procsnz[0] += ourlens[j];
2119     }
2120     for (i=1; i<size; i++) {
2121       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2122       /* calculate the number of nonzeros on each processor */
2123       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2124         procsnz[i] += rowlengths[j];
2125       }
2126       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2127     }
2128     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2129   } else {
2130     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2131   }
2132 
2133   if (!rank) {
2134     /* determine max buffer needed and allocate it */
2135     maxnz = 0;
2136     for (i=0; i<size; i++) {
2137       maxnz = PetscMax(maxnz,procsnz[i]);
2138     }
2139     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2140 
2141     /* read in my part of the matrix column indices  */
2142     nz   = procsnz[0];
2143     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2144     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2145 
2146     /* read in every one elses and ship off */
2147     for (i=1; i<size; i++) {
2148       nz   = procsnz[i];
2149       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2150       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2151     }
2152     ierr = PetscFree(cols);CHKERRQ(ierr);
2153   } else {
2154     /* determine buffer space needed for message */
2155     nz = 0;
2156     for (i=0; i<m; i++) {
2157       nz += ourlens[i];
2158     }
2159     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2160 
2161     /* receive message of column indices*/
2162     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2163     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2164     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2165   }
2166 
2167   /* determine column ownership if matrix is not square */
2168   if (N != M) {
2169     n      = N/size + ((N % size) > rank);
2170     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2171     cstart = cend - n;
2172   } else {
2173     cstart = rstart;
2174     cend   = rend;
2175     n      = cend - cstart;
2176   }
2177 
2178   /* loop over local rows, determining number of off diagonal entries */
2179   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2180   jj = 0;
2181   for (i=0; i<m; i++) {
2182     for (j=0; j<ourlens[i]; j++) {
2183       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2184       jj++;
2185     }
2186   }
2187 
2188   /* create our matrix */
2189   for (i=0; i<m; i++) {
2190     ourlens[i] -= offlens[i];
2191   }
2192   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2193   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2194   ierr = MatSetType(A,type);CHKERRQ(ierr);
2195   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2196 
2197   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2198   for (i=0; i<m; i++) {
2199     ourlens[i] += offlens[i];
2200   }
2201 
2202   if (!rank) {
2203     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2204 
2205     /* read in my part of the matrix numerical values  */
2206     nz   = procsnz[0];
2207     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2208 
2209     /* insert into matrix */
2210     jj      = rstart;
2211     smycols = mycols;
2212     svals   = vals;
2213     for (i=0; i<m; i++) {
2214       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2215       smycols += ourlens[i];
2216       svals   += ourlens[i];
2217       jj++;
2218     }
2219 
2220     /* read in other processors and ship out */
2221     for (i=1; i<size; i++) {
2222       nz   = procsnz[i];
2223       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2224       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2225     }
2226     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2227   } else {
2228     /* receive numeric values */
2229     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2230 
2231     /* receive message of values*/
2232     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2233     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2234     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2235 
2236     /* insert into matrix */
2237     jj      = rstart;
2238     smycols = mycols;
2239     svals   = vals;
2240     for (i=0; i<m; i++) {
2241       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2242       smycols += ourlens[i];
2243       svals   += ourlens[i];
2244       jj++;
2245     }
2246   }
2247   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2248   ierr = PetscFree(vals);CHKERRQ(ierr);
2249   ierr = PetscFree(mycols);CHKERRQ(ierr);
2250   ierr = PetscFree(rowners);CHKERRQ(ierr);
2251 
2252   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254   *newmat = A;
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2260 /*
2261     Not great since it makes two copies of the submatrix, first an SeqAIJ
2262   in local and then by concatenating the local matrices the end result.
2263   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2264 */
2265 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2266 {
2267   PetscErrorCode ierr;
2268   PetscMPIInt    rank,size;
2269   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2270   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2271   Mat            *local,M,Mreuse;
2272   PetscScalar    *vwork,*aa;
2273   MPI_Comm       comm = mat->comm;
2274   Mat_SeqAIJ     *aij;
2275 
2276 
2277   PetscFunctionBegin;
2278   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2279   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2280 
2281   if (call ==  MAT_REUSE_MATRIX) {
2282     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2283     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2284     local = &Mreuse;
2285     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2286   } else {
2287     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2288     Mreuse = *local;
2289     ierr   = PetscFree(local);CHKERRQ(ierr);
2290   }
2291 
2292   /*
2293       m - number of local rows
2294       n - number of columns (same on all processors)
2295       rstart - first row in new global matrix generated
2296   */
2297   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2298   if (call == MAT_INITIAL_MATRIX) {
2299     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2300     ii  = aij->i;
2301     jj  = aij->j;
2302 
2303     /*
2304         Determine the number of non-zeros in the diagonal and off-diagonal
2305         portions of the matrix in order to do correct preallocation
2306     */
2307 
2308     /* first get start and end of "diagonal" columns */
2309     if (csize == PETSC_DECIDE) {
2310       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2311       if (mglobal == n) { /* square matrix */
2312 	nlocal = m;
2313       } else {
2314         nlocal = n/size + ((n % size) > rank);
2315       }
2316     } else {
2317       nlocal = csize;
2318     }
2319     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2320     rstart = rend - nlocal;
2321     if (rank == size - 1 && rend != n) {
2322       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2323     }
2324 
2325     /* next, compute all the lengths */
2326     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2327     olens = dlens + m;
2328     for (i=0; i<m; i++) {
2329       jend = ii[i+1] - ii[i];
2330       olen = 0;
2331       dlen = 0;
2332       for (j=0; j<jend; j++) {
2333         if (*jj < rstart || *jj >= rend) olen++;
2334         else dlen++;
2335         jj++;
2336       }
2337       olens[i] = olen;
2338       dlens[i] = dlen;
2339     }
2340     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2341     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2342     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2343     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2344     ierr = PetscFree(dlens);CHKERRQ(ierr);
2345   } else {
2346     PetscInt ml,nl;
2347 
2348     M = *newmat;
2349     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2350     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2351     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2352     /*
2353          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2354        rather than the slower MatSetValues().
2355     */
2356     M->was_assembled = PETSC_TRUE;
2357     M->assembled     = PETSC_FALSE;
2358   }
2359   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2360   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2361   ii  = aij->i;
2362   jj  = aij->j;
2363   aa  = aij->a;
2364   for (i=0; i<m; i++) {
2365     row   = rstart + i;
2366     nz    = ii[i+1] - ii[i];
2367     cwork = jj;     jj += nz;
2368     vwork = aa;     aa += nz;
2369     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2370   }
2371 
2372   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2374   *newmat = M;
2375 
2376   /* save submatrix used in processor for next request */
2377   if (call ==  MAT_INITIAL_MATRIX) {
2378     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2379     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2380   }
2381 
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 EXTERN_C_BEGIN
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2388 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2389 {
2390   PetscInt       m,cstart, cend,j,nnz,i,d;
2391   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2392   const PetscInt *JJ;
2393   PetscScalar    *values;
2394   PetscErrorCode ierr;
2395 
2396   PetscFunctionBegin;
2397   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2398 
2399   B->rmap.bs = B->cmap.bs = 1;
2400   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2401   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2402   m      = B->rmap.n;
2403   cstart = B->cmap.rstart;
2404   cend   = B->cmap.rend;
2405   rstart = B->rmap.rstart;
2406 
2407   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2408   o_nnz = d_nnz + m;
2409 
2410   for (i=0; i<m; i++) {
2411     nnz     = Ii[i+1]- Ii[i];
2412     JJ      = J + Ii[i];
2413     nnz_max = PetscMax(nnz_max,nnz);
2414     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2415     for (j=0; j<nnz; j++) {
2416       if (*JJ >= cstart) break;
2417       JJ++;
2418     }
2419     d = 0;
2420     for (; j<nnz; j++) {
2421       if (*JJ++ >= cend) break;
2422       d++;
2423     }
2424     d_nnz[i] = d;
2425     o_nnz[i] = nnz - d;
2426   }
2427   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2428   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2429 
2430   if (v) values = (PetscScalar*)v;
2431   else {
2432     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2433     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2434   }
2435 
2436   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2437   for (i=0; i<m; i++) {
2438     ii   = i + rstart;
2439     nnz  = Ii[i+1]- Ii[i];
2440     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2441   }
2442   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2445 
2446   if (!v) {
2447     ierr = PetscFree(values);CHKERRQ(ierr);
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 EXTERN_C_END
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2455 /*@
2456    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2457    (the default parallel PETSc format).
2458 
2459    Collective on MPI_Comm
2460 
2461    Input Parameters:
2462 +  B - the matrix
2463 .  i - the indices into j for the start of each local row (starts with zero)
2464 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2465 -  v - optional values in the matrix
2466 
2467    Level: developer
2468 
2469    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2470      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2471 
2472 .keywords: matrix, aij, compressed row, sparse, parallel
2473 
2474 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2475           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2476 @*/
2477 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2478 {
2479   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2480 
2481   PetscFunctionBegin;
2482   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2483   if (f) {
2484     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2485   }
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 #undef __FUNCT__
2490 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2491 /*@C
2492    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2493    (the default parallel PETSc format).  For good matrix assembly performance
2494    the user should preallocate the matrix storage by setting the parameters
2495    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2496    performance can be increased by more than a factor of 50.
2497 
2498    Collective on MPI_Comm
2499 
2500    Input Parameters:
2501 +  A - the matrix
2502 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2503            (same value is used for all local rows)
2504 .  d_nnz - array containing the number of nonzeros in the various rows of the
2505            DIAGONAL portion of the local submatrix (possibly different for each row)
2506            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2507            The size of this array is equal to the number of local rows, i.e 'm'.
2508            You must leave room for the diagonal entry even if it is zero.
2509 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2510            submatrix (same value is used for all local rows).
2511 -  o_nnz - array containing the number of nonzeros in the various rows of the
2512            OFF-DIAGONAL portion of the local submatrix (possibly different for
2513            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2514            structure. The size of this array is equal to the number
2515            of local rows, i.e 'm'.
2516 
2517    If the *_nnz parameter is given then the *_nz parameter is ignored
2518 
2519    The AIJ format (also called the Yale sparse matrix format or
2520    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2521    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2522 
2523    The parallel matrix is partitioned such that the first m0 rows belong to
2524    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2525    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2526 
2527    The DIAGONAL portion of the local submatrix of a processor can be defined
2528    as the submatrix which is obtained by extraction the part corresponding
2529    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2530    first row that belongs to the processor, and r2 is the last row belonging
2531    to the this processor. This is a square mxm matrix. The remaining portion
2532    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2533 
2534    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2535 
2536    Example usage:
2537 
2538    Consider the following 8x8 matrix with 34 non-zero values, that is
2539    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2540    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2541    as follows:
2542 
2543 .vb
2544             1  2  0  |  0  3  0  |  0  4
2545     Proc0   0  5  6  |  7  0  0  |  8  0
2546             9  0 10  | 11  0  0  | 12  0
2547     -------------------------------------
2548            13  0 14  | 15 16 17  |  0  0
2549     Proc1   0 18  0  | 19 20 21  |  0  0
2550             0  0  0  | 22 23  0  | 24  0
2551     -------------------------------------
2552     Proc2  25 26 27  |  0  0 28  | 29  0
2553            30  0  0  | 31 32 33  |  0 34
2554 .ve
2555 
2556    This can be represented as a collection of submatrices as:
2557 
2558 .vb
2559       A B C
2560       D E F
2561       G H I
2562 .ve
2563 
2564    Where the submatrices A,B,C are owned by proc0, D,E,F are
2565    owned by proc1, G,H,I are owned by proc2.
2566 
2567    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2568    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2569    The 'M','N' parameters are 8,8, and have the same values on all procs.
2570 
2571    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2572    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2573    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2574    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2575    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2576    matrix, ans [DF] as another SeqAIJ matrix.
2577 
2578    When d_nz, o_nz parameters are specified, d_nz storage elements are
2579    allocated for every row of the local diagonal submatrix, and o_nz
2580    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2581    One way to choose d_nz and o_nz is to use the max nonzerors per local
2582    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2583    In this case, the values of d_nz,o_nz are:
2584 .vb
2585      proc0 : dnz = 2, o_nz = 2
2586      proc1 : dnz = 3, o_nz = 2
2587      proc2 : dnz = 1, o_nz = 4
2588 .ve
2589    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2590    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2591    for proc3. i.e we are using 12+15+10=37 storage locations to store
2592    34 values.
2593 
2594    When d_nnz, o_nnz parameters are specified, the storage is specified
2595    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2596    In the above case the values for d_nnz,o_nnz are:
2597 .vb
2598      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2599      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2600      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2601 .ve
2602    Here the space allocated is sum of all the above values i.e 34, and
2603    hence pre-allocation is perfect.
2604 
2605    Level: intermediate
2606 
2607 .keywords: matrix, aij, compressed row, sparse, parallel
2608 
2609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2610           MPIAIJ
2611 @*/
2612 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2613 {
2614   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2615 
2616   PetscFunctionBegin;
2617   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2618   if (f) {
2619     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2620   }
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2626 /*@C
2627      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2628          CSR format the local rows.
2629 
2630    Collective on MPI_Comm
2631 
2632    Input Parameters:
2633 +  comm - MPI communicator
2634 .  m - number of local rows (Cannot be PETSC_DECIDE)
2635 .  n - This value should be the same as the local size used in creating the
2636        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2637        calculated if N is given) For square matrices n is almost always m.
2638 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2639 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2640 .   i - row indices
2641 .   j - column indices
2642 -   a - matrix values
2643 
2644    Output Parameter:
2645 .   mat - the matrix
2646 
2647    Level: intermediate
2648 
2649    Notes:
2650        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2651      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2652      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2653 
2654        The i and j indices are 0 based
2655 
2656 .keywords: matrix, aij, compressed row, sparse, parallel
2657 
2658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2659           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2660 @*/
2661 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2662 {
2663   PetscErrorCode ierr;
2664 
2665  PetscFunctionBegin;
2666   if (i[0]) {
2667     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2668   }
2669   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2670   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2671   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2672   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatCreateMPIAIJ"
2678 /*@C
2679    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2680    (the default parallel PETSc format).  For good matrix assembly performance
2681    the user should preallocate the matrix storage by setting the parameters
2682    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2683    performance can be increased by more than a factor of 50.
2684 
2685    Collective on MPI_Comm
2686 
2687    Input Parameters:
2688 +  comm - MPI communicator
2689 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2690            This value should be the same as the local size used in creating the
2691            y vector for the matrix-vector product y = Ax.
2692 .  n - This value should be the same as the local size used in creating the
2693        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2694        calculated if N is given) For square matrices n is almost always m.
2695 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2696 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2697 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2698            (same value is used for all local rows)
2699 .  d_nnz - array containing the number of nonzeros in the various rows of the
2700            DIAGONAL portion of the local submatrix (possibly different for each row)
2701            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2702            The size of this array is equal to the number of local rows, i.e 'm'.
2703            You must leave room for the diagonal entry even if it is zero.
2704 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2705            submatrix (same value is used for all local rows).
2706 -  o_nnz - array containing the number of nonzeros in the various rows of the
2707            OFF-DIAGONAL portion of the local submatrix (possibly different for
2708            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2709            structure. The size of this array is equal to the number
2710            of local rows, i.e 'm'.
2711 
2712    Output Parameter:
2713 .  A - the matrix
2714 
2715    Notes:
2716    If the *_nnz parameter is given then the *_nz parameter is ignored
2717 
2718    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2719    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2720    storage requirements for this matrix.
2721 
2722    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2723    processor than it must be used on all processors that share the object for
2724    that argument.
2725 
2726    The user MUST specify either the local or global matrix dimensions
2727    (possibly both).
2728 
2729    The parallel matrix is partitioned such that the first m0 rows belong to
2730    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2731    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2732 
2733    The DIAGONAL portion of the local submatrix of a processor can be defined
2734    as the submatrix which is obtained by extraction the part corresponding
2735    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2736    first row that belongs to the processor, and r2 is the last row belonging
2737    to the this processor. This is a square mxm matrix. The remaining portion
2738    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2739 
2740    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2741 
2742    When calling this routine with a single process communicator, a matrix of
2743    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2744    type of communicator, use the construction mechanism:
2745      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2746 
2747    By default, this format uses inodes (identical nodes) when possible.
2748    We search for consecutive rows with the same nonzero structure, thereby
2749    reusing matrix information to achieve increased efficiency.
2750 
2751    Options Database Keys:
2752 +  -mat_no_inode  - Do not use inodes
2753 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2754 -  -mat_aij_oneindex - Internally use indexing starting at 1
2755         rather than 0.  Note that when calling MatSetValues(),
2756         the user still MUST index entries starting at 0!
2757 
2758 
2759    Example usage:
2760 
2761    Consider the following 8x8 matrix with 34 non-zero values, that is
2762    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2763    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2764    as follows:
2765 
2766 .vb
2767             1  2  0  |  0  3  0  |  0  4
2768     Proc0   0  5  6  |  7  0  0  |  8  0
2769             9  0 10  | 11  0  0  | 12  0
2770     -------------------------------------
2771            13  0 14  | 15 16 17  |  0  0
2772     Proc1   0 18  0  | 19 20 21  |  0  0
2773             0  0  0  | 22 23  0  | 24  0
2774     -------------------------------------
2775     Proc2  25 26 27  |  0  0 28  | 29  0
2776            30  0  0  | 31 32 33  |  0 34
2777 .ve
2778 
2779    This can be represented as a collection of submatrices as:
2780 
2781 .vb
2782       A B C
2783       D E F
2784       G H I
2785 .ve
2786 
2787    Where the submatrices A,B,C are owned by proc0, D,E,F are
2788    owned by proc1, G,H,I are owned by proc2.
2789 
2790    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2791    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2792    The 'M','N' parameters are 8,8, and have the same values on all procs.
2793 
2794    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2795    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2796    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2797    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2798    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2799    matrix, ans [DF] as another SeqAIJ matrix.
2800 
2801    When d_nz, o_nz parameters are specified, d_nz storage elements are
2802    allocated for every row of the local diagonal submatrix, and o_nz
2803    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2804    One way to choose d_nz and o_nz is to use the max nonzerors per local
2805    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2806    In this case, the values of d_nz,o_nz are:
2807 .vb
2808      proc0 : dnz = 2, o_nz = 2
2809      proc1 : dnz = 3, o_nz = 2
2810      proc2 : dnz = 1, o_nz = 4
2811 .ve
2812    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2813    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2814    for proc3. i.e we are using 12+15+10=37 storage locations to store
2815    34 values.
2816 
2817    When d_nnz, o_nnz parameters are specified, the storage is specified
2818    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2819    In the above case the values for d_nnz,o_nnz are:
2820 .vb
2821      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2822      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2823      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2824 .ve
2825    Here the space allocated is sum of all the above values i.e 34, and
2826    hence pre-allocation is perfect.
2827 
2828    Level: intermediate
2829 
2830 .keywords: matrix, aij, compressed row, sparse, parallel
2831 
2832 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2833           MPIAIJ, MatCreateMPIAIJWithArrays()
2834 @*/
2835 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2836 {
2837   PetscErrorCode ierr;
2838   PetscMPIInt    size;
2839 
2840   PetscFunctionBegin;
2841   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2842   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2843   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2844   if (size > 1) {
2845     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2846     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2847   } else {
2848     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2849     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2850   }
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2856 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2857 {
2858   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2859 
2860   PetscFunctionBegin;
2861   *Ad     = a->A;
2862   *Ao     = a->B;
2863   *colmap = a->garray;
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2869 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2870 {
2871   PetscErrorCode ierr;
2872   PetscInt       i;
2873   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2874 
2875   PetscFunctionBegin;
2876   if (coloring->ctype == IS_COLORING_GLOBAL) {
2877     ISColoringValue *allcolors,*colors;
2878     ISColoring      ocoloring;
2879 
2880     /* set coloring for diagonal portion */
2881     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2882 
2883     /* set coloring for off-diagonal portion */
2884     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2885     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2886     for (i=0; i<a->B->cmap.n; i++) {
2887       colors[i] = allcolors[a->garray[i]];
2888     }
2889     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2890     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2891     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2892     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2893   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2894     ISColoringValue *colors;
2895     PetscInt        *larray;
2896     ISColoring      ocoloring;
2897 
2898     /* set coloring for diagonal portion */
2899     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2900     for (i=0; i<a->A->cmap.n; i++) {
2901       larray[i] = i + A->cmap.rstart;
2902     }
2903     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2904     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2905     for (i=0; i<a->A->cmap.n; i++) {
2906       colors[i] = coloring->colors[larray[i]];
2907     }
2908     ierr = PetscFree(larray);CHKERRQ(ierr);
2909     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2910     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2911     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2912 
2913     /* set coloring for off-diagonal portion */
2914     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2915     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2916     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2917     for (i=0; i<a->B->cmap.n; i++) {
2918       colors[i] = coloring->colors[larray[i]];
2919     }
2920     ierr = PetscFree(larray);CHKERRQ(ierr);
2921     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2922     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2923     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2924   } else {
2925     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2926   }
2927 
2928   PetscFunctionReturn(0);
2929 }
2930 
2931 #if defined(PETSC_HAVE_ADIC)
2932 #undef __FUNCT__
2933 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2934 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2935 {
2936   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2937   PetscErrorCode ierr;
2938 
2939   PetscFunctionBegin;
2940   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2941   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 #endif
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2948 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2949 {
2950   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2951   PetscErrorCode ierr;
2952 
2953   PetscFunctionBegin;
2954   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2955   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatMerge"
2961 /*@C
2962       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2963                  matrices from each processor
2964 
2965     Collective on MPI_Comm
2966 
2967    Input Parameters:
2968 +    comm - the communicators the parallel matrix will live on
2969 .    inmat - the input sequential matrices
2970 .    n - number of local columns (or PETSC_DECIDE)
2971 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2972 
2973    Output Parameter:
2974 .    outmat - the parallel matrix generated
2975 
2976     Level: advanced
2977 
2978    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2979 
2980 @*/
2981 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2982 {
2983   PetscErrorCode ierr;
2984   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2985   PetscInt       *indx;
2986   PetscScalar    *values;
2987 
2988   PetscFunctionBegin;
2989   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2990   if (scall == MAT_INITIAL_MATRIX){
2991     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2992     if (n == PETSC_DECIDE){
2993       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2994     }
2995     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2996     rstart -= m;
2997 
2998     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2999     for (i=0;i<m;i++) {
3000       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3001       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3002       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3003     }
3004     /* This routine will ONLY return MPIAIJ type matrix */
3005     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3006     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3007     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3008     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3009     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3010 
3011   } else if (scall == MAT_REUSE_MATRIX){
3012     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3013   } else {
3014     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3015   }
3016 
3017   for (i=0;i<m;i++) {
3018     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3019     Ii    = i + rstart;
3020     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3021     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3022   }
3023   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3024   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3025   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3026 
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatFileSplit"
3032 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3033 {
3034   PetscErrorCode    ierr;
3035   PetscMPIInt       rank;
3036   PetscInt          m,N,i,rstart,nnz;
3037   size_t            len;
3038   const PetscInt    *indx;
3039   PetscViewer       out;
3040   char              *name;
3041   Mat               B;
3042   const PetscScalar *values;
3043 
3044   PetscFunctionBegin;
3045   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3046   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3047   /* Should this be the type of the diagonal block of A? */
3048   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3049   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3050   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3051   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3052   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3053   for (i=0;i<m;i++) {
3054     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3055     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3056     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3057   }
3058   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3059   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060 
3061   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3062   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3063   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3064   sprintf(name,"%s.%d",outfile,rank);
3065   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3066   ierr = PetscFree(name);
3067   ierr = MatView(B,out);CHKERRQ(ierr);
3068   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3069   ierr = MatDestroy(B);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3076 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3077 {
3078   PetscErrorCode       ierr;
3079   Mat_Merge_SeqsToMPI  *merge;
3080   PetscObjectContainer container;
3081 
3082   PetscFunctionBegin;
3083   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3084   if (container) {
3085     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3086     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3087     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3088     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3089     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3090     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3091     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3092     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3093     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3094     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3095     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3096     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3097 
3098     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
3099     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3100   }
3101   ierr = PetscFree(merge);CHKERRQ(ierr);
3102 
3103   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3104   PetscFunctionReturn(0);
3105 }
3106 
3107 #include "src/mat/utils/freespace.h"
3108 #include "petscbt.h"
3109 static PetscEvent logkey_seqstompinum = 0;
3110 #undef __FUNCT__
3111 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3112 /*@C
3113       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3114                  matrices from each processor
3115 
3116     Collective on MPI_Comm
3117 
3118    Input Parameters:
3119 +    comm - the communicators the parallel matrix will live on
3120 .    seqmat - the input sequential matrices
3121 .    m - number of local rows (or PETSC_DECIDE)
3122 .    n - number of local columns (or PETSC_DECIDE)
3123 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3124 
3125    Output Parameter:
3126 .    mpimat - the parallel matrix generated
3127 
3128     Level: advanced
3129 
3130    Notes:
3131      The dimensions of the sequential matrix in each processor MUST be the same.
3132      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3133      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3134 @*/
3135 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3136 {
3137   PetscErrorCode       ierr;
3138   MPI_Comm             comm=mpimat->comm;
3139   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3140   PetscMPIInt          size,rank,taga,*len_s;
3141   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3142   PetscInt             proc,m;
3143   PetscInt             **buf_ri,**buf_rj;
3144   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3145   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3146   MPI_Request          *s_waits,*r_waits;
3147   MPI_Status           *status;
3148   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3149   Mat_Merge_SeqsToMPI  *merge;
3150   PetscObjectContainer container;
3151 
3152   PetscFunctionBegin;
3153   if (!logkey_seqstompinum) {
3154     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3155   }
3156   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3157 
3158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3160 
3161   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3162   if (container) {
3163     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3164   }
3165   bi     = merge->bi;
3166   bj     = merge->bj;
3167   buf_ri = merge->buf_ri;
3168   buf_rj = merge->buf_rj;
3169 
3170   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3171   owners = merge->rowmap.range;
3172   len_s  = merge->len_s;
3173 
3174   /* send and recv matrix values */
3175   /*-----------------------------*/
3176   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3177   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3178 
3179   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3180   for (proc=0,k=0; proc<size; proc++){
3181     if (!len_s[proc]) continue;
3182     i = owners[proc];
3183     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3184     k++;
3185   }
3186 
3187   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3188   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3189   ierr = PetscFree(status);CHKERRQ(ierr);
3190 
3191   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3192   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3193 
3194   /* insert mat values of mpimat */
3195   /*----------------------------*/
3196   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3197   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3198   nextrow = buf_ri_k + merge->nrecv;
3199   nextai  = nextrow + merge->nrecv;
3200 
3201   for (k=0; k<merge->nrecv; k++){
3202     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3203     nrows = *(buf_ri_k[k]);
3204     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3205     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3206   }
3207 
3208   /* set values of ba */
3209   m = merge->rowmap.n;
3210   for (i=0; i<m; i++) {
3211     arow = owners[rank] + i;
3212     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3213     bnzi = bi[i+1] - bi[i];
3214     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3215 
3216     /* add local non-zero vals of this proc's seqmat into ba */
3217     anzi = ai[arow+1] - ai[arow];
3218     aj   = a->j + ai[arow];
3219     aa   = a->a + ai[arow];
3220     nextaj = 0;
3221     for (j=0; nextaj<anzi; j++){
3222       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3223         ba_i[j] += aa[nextaj++];
3224       }
3225     }
3226 
3227     /* add received vals into ba */
3228     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3229       /* i-th row */
3230       if (i == *nextrow[k]) {
3231         anzi = *(nextai[k]+1) - *nextai[k];
3232         aj   = buf_rj[k] + *(nextai[k]);
3233         aa   = abuf_r[k] + *(nextai[k]);
3234         nextaj = 0;
3235         for (j=0; nextaj<anzi; j++){
3236           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3237             ba_i[j] += aa[nextaj++];
3238           }
3239         }
3240         nextrow[k]++; nextai[k]++;
3241       }
3242     }
3243     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3244   }
3245   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3246   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3247 
3248   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3249   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3250   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3251   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 static PetscEvent logkey_seqstompisym = 0;
3256 #undef __FUNCT__
3257 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3259 {
3260   PetscErrorCode       ierr;
3261   Mat                  B_mpi;
3262   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3263   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3264   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3265   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3266   PetscInt             len,proc,*dnz,*onz;
3267   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3268   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3269   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3270   MPI_Status           *status;
3271   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3272   PetscBT              lnkbt;
3273   Mat_Merge_SeqsToMPI  *merge;
3274   PetscObjectContainer container;
3275 
3276   PetscFunctionBegin;
3277   if (!logkey_seqstompisym) {
3278     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3279   }
3280   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3281 
3282   /* make sure it is a PETSc comm */
3283   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3284   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3285   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3286 
3287   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3288   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3289 
3290   /* determine row ownership */
3291   /*---------------------------------------------------------*/
3292   merge->rowmap.n = m;
3293   merge->rowmap.N = M;
3294   merge->rowmap.bs = 1;
3295   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3296   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3297   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3298 
3299   m      = merge->rowmap.n;
3300   M      = merge->rowmap.N;
3301   owners = merge->rowmap.range;
3302 
3303   /* determine the number of messages to send, their lengths */
3304   /*---------------------------------------------------------*/
3305   len_s  = merge->len_s;
3306 
3307   len = 0;  /* length of buf_si[] */
3308   merge->nsend = 0;
3309   for (proc=0; proc<size; proc++){
3310     len_si[proc] = 0;
3311     if (proc == rank){
3312       len_s[proc] = 0;
3313     } else {
3314       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3315       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3316     }
3317     if (len_s[proc]) {
3318       merge->nsend++;
3319       nrows = 0;
3320       for (i=owners[proc]; i<owners[proc+1]; i++){
3321         if (ai[i+1] > ai[i]) nrows++;
3322       }
3323       len_si[proc] = 2*(nrows+1);
3324       len += len_si[proc];
3325     }
3326   }
3327 
3328   /* determine the number and length of messages to receive for ij-structure */
3329   /*-------------------------------------------------------------------------*/
3330   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3331   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3332 
3333   /* post the Irecv of j-structure */
3334   /*-------------------------------*/
3335   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3336   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3337 
3338   /* post the Isend of j-structure */
3339   /*--------------------------------*/
3340   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3341   sj_waits = si_waits + merge->nsend;
3342 
3343   for (proc=0, k=0; proc<size; proc++){
3344     if (!len_s[proc]) continue;
3345     i = owners[proc];
3346     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3347     k++;
3348   }
3349 
3350   /* receives and sends of j-structure are complete */
3351   /*------------------------------------------------*/
3352   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3353   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3354 
3355   /* send and recv i-structure */
3356   /*---------------------------*/
3357   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3358   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3359 
3360   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3361   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3362   for (proc=0,k=0; proc<size; proc++){
3363     if (!len_s[proc]) continue;
3364     /* form outgoing message for i-structure:
3365          buf_si[0]:                 nrows to be sent
3366                [1:nrows]:           row index (global)
3367                [nrows+1:2*nrows+1]: i-structure index
3368     */
3369     /*-------------------------------------------*/
3370     nrows = len_si[proc]/2 - 1;
3371     buf_si_i    = buf_si + nrows+1;
3372     buf_si[0]   = nrows;
3373     buf_si_i[0] = 0;
3374     nrows = 0;
3375     for (i=owners[proc]; i<owners[proc+1]; i++){
3376       anzi = ai[i+1] - ai[i];
3377       if (anzi) {
3378         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3379         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3380         nrows++;
3381       }
3382     }
3383     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3384     k++;
3385     buf_si += len_si[proc];
3386   }
3387 
3388   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3389   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3390 
3391   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3392   for (i=0; i<merge->nrecv; i++){
3393     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3394   }
3395 
3396   ierr = PetscFree(len_si);CHKERRQ(ierr);
3397   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3398   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3399   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3400   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3401   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3402   ierr = PetscFree(status);CHKERRQ(ierr);
3403 
3404   /* compute a local seq matrix in each processor */
3405   /*----------------------------------------------*/
3406   /* allocate bi array and free space for accumulating nonzero column info */
3407   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3408   bi[0] = 0;
3409 
3410   /* create and initialize a linked list */
3411   nlnk = N+1;
3412   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3413 
3414   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3415   len = 0;
3416   len  = ai[owners[rank+1]] - ai[owners[rank]];
3417   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3418   current_space = free_space;
3419 
3420   /* determine symbolic info for each local row */
3421   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3422   nextrow = buf_ri_k + merge->nrecv;
3423   nextai  = nextrow + merge->nrecv;
3424   for (k=0; k<merge->nrecv; k++){
3425     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3426     nrows = *buf_ri_k[k];
3427     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3428     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3429   }
3430 
3431   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3432   len = 0;
3433   for (i=0;i<m;i++) {
3434     bnzi   = 0;
3435     /* add local non-zero cols of this proc's seqmat into lnk */
3436     arow   = owners[rank] + i;
3437     anzi   = ai[arow+1] - ai[arow];
3438     aj     = a->j + ai[arow];
3439     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3440     bnzi += nlnk;
3441     /* add received col data into lnk */
3442     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3443       if (i == *nextrow[k]) { /* i-th row */
3444         anzi = *(nextai[k]+1) - *nextai[k];
3445         aj   = buf_rj[k] + *nextai[k];
3446         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3447         bnzi += nlnk;
3448         nextrow[k]++; nextai[k]++;
3449       }
3450     }
3451     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3452 
3453     /* if free space is not available, make more free space */
3454     if (current_space->local_remaining<bnzi) {
3455       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3456       nspacedouble++;
3457     }
3458     /* copy data into free space, then initialize lnk */
3459     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3460     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3461 
3462     current_space->array           += bnzi;
3463     current_space->local_used      += bnzi;
3464     current_space->local_remaining -= bnzi;
3465 
3466     bi[i+1] = bi[i] + bnzi;
3467   }
3468 
3469   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3470 
3471   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3472   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3473   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3474 
3475   /* create symbolic parallel matrix B_mpi */
3476   /*---------------------------------------*/
3477   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3478   if (n==PETSC_DECIDE) {
3479     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3480   } else {
3481     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3482   }
3483   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3484   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3485   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3486 
3487   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3488   B_mpi->assembled     = PETSC_FALSE;
3489   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3490   merge->bi            = bi;
3491   merge->bj            = bj;
3492   merge->buf_ri        = buf_ri;
3493   merge->buf_rj        = buf_rj;
3494   merge->coi           = PETSC_NULL;
3495   merge->coj           = PETSC_NULL;
3496   merge->owners_co     = PETSC_NULL;
3497 
3498   /* attach the supporting struct to B_mpi for reuse */
3499   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3500   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3501   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3502   *mpimat = B_mpi;
3503 
3504   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3505   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3506   PetscFunctionReturn(0);
3507 }
3508 
3509 static PetscEvent logkey_seqstompi = 0;
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatMerge_SeqsToMPI"
3512 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3513 {
3514   PetscErrorCode   ierr;
3515 
3516   PetscFunctionBegin;
3517   if (!logkey_seqstompi) {
3518     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3519   }
3520   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3521   if (scall == MAT_INITIAL_MATRIX){
3522     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3523   }
3524   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3525   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3526   PetscFunctionReturn(0);
3527 }
3528 static PetscEvent logkey_getlocalmat = 0;
3529 #undef __FUNCT__
3530 #define __FUNCT__ "MatGetLocalMat"
3531 /*@C
3532      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3533 
3534     Not Collective
3535 
3536    Input Parameters:
3537 +    A - the matrix
3538 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3539 
3540    Output Parameter:
3541 .    A_loc - the local sequential matrix generated
3542 
3543     Level: developer
3544 
3545 @*/
3546 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3547 {
3548   PetscErrorCode  ierr;
3549   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3550   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3551   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3552   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3553   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3554   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3555 
3556   PetscFunctionBegin;
3557   if (!logkey_getlocalmat) {
3558     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3559   }
3560   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3561   if (scall == MAT_INITIAL_MATRIX){
3562     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3563     ci[0] = 0;
3564     for (i=0; i<am; i++){
3565       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3566     }
3567     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3568     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3569     k = 0;
3570     for (i=0; i<am; i++) {
3571       ncols_o = bi[i+1] - bi[i];
3572       ncols_d = ai[i+1] - ai[i];
3573       /* off-diagonal portion of A */
3574       for (jo=0; jo<ncols_o; jo++) {
3575         col = cmap[*bj];
3576         if (col >= cstart) break;
3577         cj[k]   = col; bj++;
3578         ca[k++] = *ba++;
3579       }
3580       /* diagonal portion of A */
3581       for (j=0; j<ncols_d; j++) {
3582         cj[k]   = cstart + *aj++;
3583         ca[k++] = *aa++;
3584       }
3585       /* off-diagonal portion of A */
3586       for (j=jo; j<ncols_o; j++) {
3587         cj[k]   = cmap[*bj++];
3588         ca[k++] = *ba++;
3589       }
3590     }
3591     /* put together the new matrix */
3592     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3593     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3594     /* Since these are PETSc arrays, change flags to free them as necessary. */
3595     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3596     mat->free_a  = PETSC_TRUE;
3597     mat->free_ij = PETSC_TRUE;
3598     mat->nonew   = 0;
3599   } else if (scall == MAT_REUSE_MATRIX){
3600     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3601     ci = mat->i; cj = mat->j; ca = mat->a;
3602     for (i=0; i<am; i++) {
3603       /* off-diagonal portion of A */
3604       ncols_o = bi[i+1] - bi[i];
3605       for (jo=0; jo<ncols_o; jo++) {
3606         col = cmap[*bj];
3607         if (col >= cstart) break;
3608         *ca++ = *ba++; bj++;
3609       }
3610       /* diagonal portion of A */
3611       ncols_d = ai[i+1] - ai[i];
3612       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3613       /* off-diagonal portion of A */
3614       for (j=jo; j<ncols_o; j++) {
3615         *ca++ = *ba++; bj++;
3616       }
3617     }
3618   } else {
3619     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3620   }
3621 
3622   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3623   PetscFunctionReturn(0);
3624 }
3625 
3626 static PetscEvent logkey_getlocalmatcondensed = 0;
3627 #undef __FUNCT__
3628 #define __FUNCT__ "MatGetLocalMatCondensed"
3629 /*@C
3630      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3631 
3632     Not Collective
3633 
3634    Input Parameters:
3635 +    A - the matrix
3636 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3637 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3638 
3639    Output Parameter:
3640 .    A_loc - the local sequential matrix generated
3641 
3642     Level: developer
3643 
3644 @*/
3645 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3646 {
3647   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3648   PetscErrorCode    ierr;
3649   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3650   IS                isrowa,iscola;
3651   Mat               *aloc;
3652 
3653   PetscFunctionBegin;
3654   if (!logkey_getlocalmatcondensed) {
3655     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3656   }
3657   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3658   if (!row){
3659     start = A->rmap.rstart; end = A->rmap.rend;
3660     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3661   } else {
3662     isrowa = *row;
3663   }
3664   if (!col){
3665     start = A->cmap.rstart;
3666     cmap  = a->garray;
3667     nzA   = a->A->cmap.n;
3668     nzB   = a->B->cmap.n;
3669     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3670     ncols = 0;
3671     for (i=0; i<nzB; i++) {
3672       if (cmap[i] < start) idx[ncols++] = cmap[i];
3673       else break;
3674     }
3675     imark = i;
3676     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3677     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3678     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3679     ierr = PetscFree(idx);CHKERRQ(ierr);
3680   } else {
3681     iscola = *col;
3682   }
3683   if (scall != MAT_INITIAL_MATRIX){
3684     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3685     aloc[0] = *A_loc;
3686   }
3687   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3688   *A_loc = aloc[0];
3689   ierr = PetscFree(aloc);CHKERRQ(ierr);
3690   if (!row){
3691     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3692   }
3693   if (!col){
3694     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3695   }
3696   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 static PetscEvent logkey_GetBrowsOfAcols = 0;
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatGetBrowsOfAcols"
3703 /*@C
3704     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3705 
3706     Collective on Mat
3707 
3708    Input Parameters:
3709 +    A,B - the matrices in mpiaij format
3710 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3711 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3712 
3713    Output Parameter:
3714 +    rowb, colb - index sets of rows and columns of B to extract
3715 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3716 -    B_seq - the sequential matrix generated
3717 
3718     Level: developer
3719 
3720 @*/
3721 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3722 {
3723   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3724   PetscErrorCode    ierr;
3725   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3726   IS                isrowb,iscolb;
3727   Mat               *bseq;
3728 
3729   PetscFunctionBegin;
3730   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3731     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3732   }
3733   if (!logkey_GetBrowsOfAcols) {
3734     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3735   }
3736   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3737 
3738   if (scall == MAT_INITIAL_MATRIX){
3739     start = A->cmap.rstart;
3740     cmap  = a->garray;
3741     nzA   = a->A->cmap.n;
3742     nzB   = a->B->cmap.n;
3743     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3744     ncols = 0;
3745     for (i=0; i<nzB; i++) {  /* row < local row index */
3746       if (cmap[i] < start) idx[ncols++] = cmap[i];
3747       else break;
3748     }
3749     imark = i;
3750     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3751     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3752     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3753     ierr = PetscFree(idx);CHKERRQ(ierr);
3754     *brstart = imark;
3755     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3756   } else {
3757     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3758     isrowb = *rowb; iscolb = *colb;
3759     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3760     bseq[0] = *B_seq;
3761   }
3762   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3763   *B_seq = bseq[0];
3764   ierr = PetscFree(bseq);CHKERRQ(ierr);
3765   if (!rowb){
3766     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3767   } else {
3768     *rowb = isrowb;
3769   }
3770   if (!colb){
3771     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3772   } else {
3773     *colb = iscolb;
3774   }
3775   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3776   PetscFunctionReturn(0);
3777 }
3778 
3779 static PetscEvent logkey_GetBrowsOfAocols = 0;
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatGetBrowsOfAoCols"
3782 /*@C
3783     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3784     of the OFF-DIAGONAL portion of local A
3785 
3786     Collective on Mat
3787 
3788    Input Parameters:
3789 +    A,B - the matrices in mpiaij format
3790 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3791 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3792 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3793 
3794    Output Parameter:
3795 +    B_oth - the sequential matrix generated
3796 
3797     Level: developer
3798 
3799 @*/
3800 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3801 {
3802   VecScatter_MPI_General *gen_to,*gen_from;
3803   PetscErrorCode         ierr;
3804   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3805   Mat_SeqAIJ             *b_oth;
3806   VecScatter             ctx=a->Mvctx;
3807   MPI_Comm               comm=ctx->comm;
3808   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3809   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3810   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3811   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3812   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3813   MPI_Status             *sstatus,rstatus;
3814   PetscInt               *cols,sbs,rbs;
3815   PetscScalar            *vals;
3816 
3817   PetscFunctionBegin;
3818   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3819     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3820   }
3821   if (!logkey_GetBrowsOfAocols) {
3822     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3823   }
3824   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3825   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3826 
3827   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3828   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3829   rvalues  = gen_from->values; /* holds the length of receiving row */
3830   svalues  = gen_to->values;   /* holds the length of sending row */
3831   nrecvs   = gen_from->n;
3832   nsends   = gen_to->n;
3833 
3834   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3835   srow     = gen_to->indices;   /* local row index to be sent */
3836   sstarts  = gen_to->starts;
3837   sprocs   = gen_to->procs;
3838   sstatus  = gen_to->sstatus;
3839   sbs      = gen_to->bs;
3840   rstarts  = gen_from->starts;
3841   rprocs   = gen_from->procs;
3842   rbs      = gen_from->bs;
3843 
3844   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3845   if (scall == MAT_INITIAL_MATRIX){
3846     /* i-array */
3847     /*---------*/
3848     /*  post receives */
3849     for (i=0; i<nrecvs; i++){
3850       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
3851       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
3852       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3853     }
3854 
3855     /* pack the outgoing message */
3856     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3857     rstartsj = sstartsj + nsends +1;
3858     sstartsj[0] = 0;  rstartsj[0] = 0;
3859     len = 0; /* total length of j or a array to be sent */
3860     k = 0;
3861     for (i=0; i<nsends; i++){
3862       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
3863       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3864       for (j=0; j<nrows; j++) {
3865         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3866         for (l=0; l<sbs; l++){
3867           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3868           rowlen[j*sbs+l] = ncols;
3869           len += ncols;
3870           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3871         }
3872         k++;
3873       }
3874       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3875       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3876     }
3877     /* recvs and sends of i-array are completed */
3878     i = nrecvs;
3879     while (i--) {
3880       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3881     }
3882     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3883 
3884     /* allocate buffers for sending j and a arrays */
3885     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3886     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3887 
3888     /* create i-array of B_oth */
3889     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3890     b_othi[0] = 0;
3891     len = 0; /* total length of j or a array to be received */
3892     k = 0;
3893     for (i=0; i<nrecvs; i++){
3894       rowlen = (PetscInt*)rvalues + rstarts[i];
3895       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
3896       for (j=0; j<nrows; j++) {
3897         b_othi[k+1] = b_othi[k] + rowlen[j];
3898         len += rowlen[j]; k++;
3899       }
3900       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3901     }
3902 
3903     /* allocate space for j and a arrrays of B_oth */
3904     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3905     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3906 
3907     /* j-array */
3908     /*---------*/
3909     /*  post receives of j-array */
3910     for (i=0; i<nrecvs; i++){
3911       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3912       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3913     }
3914 
3915     /* pack the outgoing message j-array */
3916     k = 0;
3917     for (i=0; i<nsends; i++){
3918       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3919       bufJ = bufj+sstartsj[i];
3920       for (j=0; j<nrows; j++) {
3921         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3922         for (ll=0; ll<sbs; ll++){
3923           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3924           for (l=0; l<ncols; l++){
3925             *bufJ++ = cols[l];
3926           }
3927           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3928         }
3929       }
3930       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3931     }
3932 
3933     /* recvs and sends of j-array are completed */
3934     i = nrecvs;
3935     while (i--) {
3936       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3937     }
3938     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3939   } else if (scall == MAT_REUSE_MATRIX){
3940     sstartsj = *startsj;
3941     rstartsj = sstartsj + nsends +1;
3942     bufa     = *bufa_ptr;
3943     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3944     b_otha   = b_oth->a;
3945   } else {
3946     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3947   }
3948 
3949   /* a-array */
3950   /*---------*/
3951   /*  post receives of a-array */
3952   for (i=0; i<nrecvs; i++){
3953     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3954     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3955   }
3956 
3957   /* pack the outgoing message a-array */
3958   k = 0;
3959   for (i=0; i<nsends; i++){
3960     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3961     bufA = bufa+sstartsj[i];
3962     for (j=0; j<nrows; j++) {
3963       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3964       for (ll=0; ll<sbs; ll++){
3965         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3966         for (l=0; l<ncols; l++){
3967           *bufA++ = vals[l];
3968         }
3969         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3970       }
3971     }
3972     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3973   }
3974   /* recvs and sends of a-array are completed */
3975   i = nrecvs;
3976   while (i--) {
3977     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3978   }
3979   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3980   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3981 
3982   if (scall == MAT_INITIAL_MATRIX){
3983     /* put together the new matrix */
3984     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3985 
3986     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3987     /* Since these are PETSc arrays, change flags to free them as necessary. */
3988     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
3989     b_oth->free_a  = PETSC_TRUE;
3990     b_oth->free_ij = PETSC_TRUE;
3991     b_oth->nonew   = 0;
3992 
3993     ierr = PetscFree(bufj);CHKERRQ(ierr);
3994     if (!startsj || !bufa_ptr){
3995       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3996       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3997     } else {
3998       *startsj  = sstartsj;
3999       *bufa_ptr = bufa;
4000     }
4001   }
4002   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4003 
4004   PetscFunctionReturn(0);
4005 }
4006 
4007 #undef __FUNCT__
4008 #define __FUNCT__ "MatGetCommunicationStructs"
4009 /*@C
4010   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4011 
4012   Not Collective
4013 
4014   Input Parameters:
4015 . A - The matrix in mpiaij format
4016 
4017   Output Parameter:
4018 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4019 . colmap - A map from global column index to local index into lvec
4020 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4021 
4022   Level: developer
4023 
4024 @*/
4025 #if defined (PETSC_USE_CTABLE)
4026 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4027 #else
4028 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4029 #endif
4030 {
4031   Mat_MPIAIJ *a;
4032 
4033   PetscFunctionBegin;
4034   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4035   PetscValidPointer(lvec, 2)
4036   PetscValidPointer(colmap, 3)
4037   PetscValidPointer(multScatter, 4)
4038   a = (Mat_MPIAIJ *) A->data;
4039   if (lvec) *lvec = a->lvec;
4040   if (colmap) *colmap = a->colmap;
4041   if (multScatter) *multScatter = a->Mvctx;
4042   PetscFunctionReturn(0);
4043 }
4044 
4045 EXTERN_C_BEGIN
4046 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4047 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4048 EXTERN_C_END
4049 
4050 /*MC
4051    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4052 
4053    Options Database Keys:
4054 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4055 
4056   Level: beginner
4057 
4058 .seealso: MatCreateMPIAIJ
4059 M*/
4060 
4061 EXTERN_C_BEGIN
4062 #undef __FUNCT__
4063 #define __FUNCT__ "MatCreate_MPIAIJ"
4064 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4065 {
4066   Mat_MPIAIJ     *b;
4067   PetscErrorCode ierr;
4068   PetscMPIInt    size;
4069 
4070   PetscFunctionBegin;
4071   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4072 
4073   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4074   B->data         = (void*)b;
4075   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4076   B->factor       = 0;
4077   B->rmap.bs      = 1;
4078   B->assembled    = PETSC_FALSE;
4079   B->mapping      = 0;
4080 
4081   B->insertmode      = NOT_SET_VALUES;
4082   b->size            = size;
4083   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4084 
4085   /* build cache for off array entries formed */
4086   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4087   b->donotstash  = PETSC_FALSE;
4088   b->colmap      = 0;
4089   b->garray      = 0;
4090   b->roworiented = PETSC_TRUE;
4091 
4092   /* stuff used for matrix vector multiply */
4093   b->lvec      = PETSC_NULL;
4094   b->Mvctx     = PETSC_NULL;
4095 
4096   /* stuff for MatGetRow() */
4097   b->rowindices   = 0;
4098   b->rowvalues    = 0;
4099   b->getrowactive = PETSC_FALSE;
4100 
4101 
4102   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4103                                      "MatStoreValues_MPIAIJ",
4104                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4105   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4106                                      "MatRetrieveValues_MPIAIJ",
4107                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4108   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4109 				     "MatGetDiagonalBlock_MPIAIJ",
4110                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4111   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4112 				     "MatIsTranspose_MPIAIJ",
4113 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4115 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4116 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4118 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4119 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4121 				     "MatDiagonalScaleLocal_MPIAIJ",
4122 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4124                                      "MatConvert_MPIAIJ_MPICSRPERM",
4125                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4127                                      "MatConvert_MPIAIJ_MPICRL",
4128                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4129   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4130   PetscFunctionReturn(0);
4131 }
4132 EXTERN_C_END
4133 
4134 #undef __FUNCT__
4135 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4136 /*@C
4137      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4138          and "off-diagonal" part of the matrix in CSR format.
4139 
4140    Collective on MPI_Comm
4141 
4142    Input Parameters:
4143 +  comm - MPI communicator
4144 .  m - number of local rows (Cannot be PETSC_DECIDE)
4145 .  n - This value should be the same as the local size used in creating the
4146        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4147        calculated if N is given) For square matrices n is almost always m.
4148 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4149 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4150 .   i - row indices for "diagonal" portion of matrix
4151 .   j - column indices
4152 .   a - matrix values
4153 .   oi - row indices for "off-diagonal" portion of matrix
4154 .   oj - column indices
4155 -   oa - matrix values
4156 
4157    Output Parameter:
4158 .   mat - the matrix
4159 
4160    Level: advanced
4161 
4162    Notes:
4163        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4164 
4165        The i and j indices are 0 based
4166 
4167        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4168 
4169 
4170 .keywords: matrix, aij, compressed row, sparse, parallel
4171 
4172 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4173           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4174 @*/
4175 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4176 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4177 {
4178   PetscErrorCode ierr;
4179   Mat_MPIAIJ     *maij;
4180 
4181  PetscFunctionBegin;
4182   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4183   if (i[0]) {
4184     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4185   }
4186   if (oi[0]) {
4187     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4188   }
4189   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4190   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4191   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4192   maij = (Mat_MPIAIJ*) (*mat)->data;
4193   maij->donotstash     = PETSC_TRUE;
4194   (*mat)->preallocated = PETSC_TRUE;
4195 
4196   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4197   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4198   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4199 
4200   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4201   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4202 
4203   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4204   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4205   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4206   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4207 
4208   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4209   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4210   PetscFunctionReturn(0);
4211 }
4212 
4213 /*
4214     Special version for direct calls from Fortran
4215 */
4216 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4217 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4218 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4219 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4220 #endif
4221 
4222 /* Change these macros so can be used in void function */
4223 #undef CHKERRQ
4224 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4225 #undef SETERRQ2
4226 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4227 #undef SETERRQ
4228 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4229 
4230 EXTERN_C_BEGIN
4231 #undef __FUNCT__
4232 #define __FUNCT__ "matsetvaluesmpiaij_"
4233 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4234 {
4235   Mat            mat = *mmat;
4236   PetscInt       m = *mm, n = *mn;
4237   InsertMode     addv = *maddv;
4238   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4239   PetscScalar    value;
4240   PetscErrorCode ierr;
4241 
4242   MatPreallocated(mat);
4243   if (mat->insertmode == NOT_SET_VALUES) {
4244     mat->insertmode = addv;
4245   }
4246 #if defined(PETSC_USE_DEBUG)
4247   else if (mat->insertmode != addv) {
4248     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4249   }
4250 #endif
4251   {
4252   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4253   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4254   PetscTruth     roworiented = aij->roworiented;
4255 
4256   /* Some Variables required in the macro */
4257   Mat            A = aij->A;
4258   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4259   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4260   PetscScalar    *aa = a->a;
4261   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4262   Mat            B = aij->B;
4263   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4264   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4265   PetscScalar    *ba = b->a;
4266 
4267   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4268   PetscInt       nonew = a->nonew;
4269   PetscScalar    *ap1,*ap2;
4270 
4271   PetscFunctionBegin;
4272   for (i=0; i<m; i++) {
4273     if (im[i] < 0) continue;
4274 #if defined(PETSC_USE_DEBUG)
4275     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4276 #endif
4277     if (im[i] >= rstart && im[i] < rend) {
4278       row      = im[i] - rstart;
4279       lastcol1 = -1;
4280       rp1      = aj + ai[row];
4281       ap1      = aa + ai[row];
4282       rmax1    = aimax[row];
4283       nrow1    = ailen[row];
4284       low1     = 0;
4285       high1    = nrow1;
4286       lastcol2 = -1;
4287       rp2      = bj + bi[row];
4288       ap2      = ba + bi[row];
4289       rmax2    = bimax[row];
4290       nrow2    = bilen[row];
4291       low2     = 0;
4292       high2    = nrow2;
4293 
4294       for (j=0; j<n; j++) {
4295         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4296         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4297         if (in[j] >= cstart && in[j] < cend){
4298           col = in[j] - cstart;
4299           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4300         } else if (in[j] < 0) continue;
4301 #if defined(PETSC_USE_DEBUG)
4302         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4303 #endif
4304         else {
4305           if (mat->was_assembled) {
4306             if (!aij->colmap) {
4307               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4308             }
4309 #if defined (PETSC_USE_CTABLE)
4310             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4311 	    col--;
4312 #else
4313             col = aij->colmap[in[j]] - 1;
4314 #endif
4315             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4316               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4317               col =  in[j];
4318               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4319               B = aij->B;
4320               b = (Mat_SeqAIJ*)B->data;
4321               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4322               rp2      = bj + bi[row];
4323               ap2      = ba + bi[row];
4324               rmax2    = bimax[row];
4325               nrow2    = bilen[row];
4326               low2     = 0;
4327               high2    = nrow2;
4328               bm       = aij->B->rmap.n;
4329               ba = b->a;
4330             }
4331           } else col = in[j];
4332           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4333         }
4334       }
4335     } else {
4336       if (!aij->donotstash) {
4337         if (roworiented) {
4338           if (ignorezeroentries && v[i*n] == 0.0) continue;
4339           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4340         } else {
4341           if (ignorezeroentries && v[i] == 0.0) continue;
4342           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4343         }
4344       }
4345     }
4346   }}
4347   PetscFunctionReturnVoid();
4348 }
4349 EXTERN_C_END
4350 
4351