xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 1cbcd9c41d26426b9e0594c714fd92552a02df39)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
56       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81     for (_i=low2; _i<high2; _i++) {		\
82       if (rp2[_i] > col) break;			\
83       if (rp2[_i] == col) {			      \
84 	if (addv == ADD_VALUES) ap2[_i] += value;     \
85 	else                    ap2[_i] = value;      \
86 	goto b_noinsert;			      \
87       }						      \
88     }							      \
89     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
90     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
91     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93     N = nrow2++ - 1; b->nz++; high2++;					\
94     /* shift up all the later entries in this row */			\
95     for (ii=N; ii>=_i; ii--) {						\
96       rp2[ii+1] = rp2[ii];						\
97       ap2[ii+1] = ap2[ii];						\
98     }									\
99     rp2[_i] = col;							\
100     ap2[_i] = value;							\
101     b_noinsert: ;								\
102     bilen[row] = nrow2;							\
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col;
319   PetscTruth     other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = ((PetscObject)A)->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
728   ierr = PetscFree(aij);CHKERRQ(ierr);
729 
730   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
750   int               fd;
751   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
752   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (!rank) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->rmap.N;
762     header[2] = mat->cmap.N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->rmap.n;
768     range = mat->rmap.range;
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
774     rlen = mat->rmap.n;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->rmap.n; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (!rank) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->rmap.n; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (!rank) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->rmap.n; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (!rank) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,iascii,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo    info;
886       PetscTruth inodes;
887 
888       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
889       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
890       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
891       if (!inodes) {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
893 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
896 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       }
898       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
903       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
904       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
905       PetscFunctionReturn(0);
906     } else if (format == PETSC_VIEWER_ASCII_INFO) {
907       PetscInt   inodecount,inodelimit,*inodes;
908       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
909       if (inodes) {
910         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
911       } else {
912         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
913       }
914       PetscFunctionReturn(0);
915     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
916       PetscFunctionReturn(0);
917     }
918   } else if (isbinary) {
919     if (size == 1) {
920       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
921       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922     } else {
923       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
924     }
925     PetscFunctionReturn(0);
926   } else if (isdraw) {
927     PetscDraw  draw;
928     PetscTruth isnull;
929     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
930     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
931   }
932 
933   if (size == 1) {
934     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
935     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
936   } else {
937     /* assemble the entire matrix onto first processor. */
938     Mat         A;
939     Mat_SeqAIJ  *Aloc;
940     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
941     PetscScalar *a;
942 
943     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
944 
945     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
946     if (!rank) {
947       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
948     } else {
949       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
950     }
951     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
952     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
953     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
954     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
955 
956     /* copy over the A part */
957     Aloc = (Mat_SeqAIJ*)aij->A->data;
958     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
959     row = mat->rmap.rstart;
960     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
964     }
965     aj = Aloc->j;
966     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
967 
968     /* copy over the B part */
969     Aloc = (Mat_SeqAIJ*)aij->B->data;
970     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row  = mat->rmap.rstart;
972     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
973     ct   = cols;
974     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
978     }
979     ierr = PetscFree(ct);CHKERRQ(ierr);
980     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982     /*
983        Everyone has to call to draw the matrix since the graphics waits are
984        synchronized across all processors that share the PetscDraw object
985     */
986     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     if (!rank) {
988       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
990     }
991     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
992     ierr = MatDestroy(A);CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatView_MPIAIJ"
999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1000 {
1001   PetscErrorCode ierr;
1002   PetscTruth     iascii,isdraw,issocket,isbinary;
1003 
1004   PetscFunctionBegin;
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1006   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1008   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1009   if (iascii || isdraw || isbinary || issocket) {
1010     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1011   } else {
1012     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatRelax_MPIAIJ"
1019 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1020 {
1021   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1022   PetscErrorCode ierr;
1023   Vec            bb1;
1024 
1025   PetscFunctionBegin;
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1076     }
1077   } else {
1078     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1079   }
1080 
1081   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatPermute_MPIAIJ"
1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1088 {
1089   MPI_Comm       comm,pcomm;
1090   PetscInt       first,local_size,nrows,*rows;
1091   int            ntids;
1092   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1097   /* make a collective version of 'rowp' */
1098   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1099   if (pcomm==comm) {
1100     crowp = rowp;
1101   } else {
1102     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1103     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1104     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1105     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1106   }
1107   /* collect the global row permutation and invert it */
1108   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1109   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1110   if (pcomm!=comm) {
1111     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1112   }
1113   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1114   /* get the local target indices */
1115   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1119   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1121   /* the column permutation is so much easier;
1122      make a local version of 'colp' and invert it */
1123   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1125   if (ntids==1) {
1126     lcolp = colp;
1127   } else {
1128     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1129     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1131   }
1132   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1133   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1134   if (ntids>1) {
1135     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1137   }
1138   /* now we just get the submatrix */
1139   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1140   /* clean up */
1141   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1142   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1151   Mat            A = mat->A,B = mat->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = 1.0;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   info->rows_global       = (double)matin->rmap.N;
1188   info->columns_global    = (double)matin->cmap.N;
1189   info->rows_local        = (double)matin->rmap.n;
1190   info->columns_local     = (double)matin->cmap.N;
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPIAIJ"
1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NEW_NONZERO_LOCATIONS:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_KEEP_ZEROED_ROWS:
1207   case MAT_NEW_NONZERO_LOCATION_ERR:
1208   case MAT_USE_INODES:
1209   case MAT_IGNORE_ZERO_ENTRIES:
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_ROW_ORIENTED:
1214     a->roworiented = flg;
1215     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1217     break;
1218   case MAT_NEW_DIAGONALS:
1219     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1220     break;
1221   case MAT_IGNORE_OFF_PROC_ENTRIES:
1222     a->donotstash = PETSC_TRUE;
1223     break;
1224   case MAT_SYMMETRIC:
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226     break;
1227   case MAT_STRUCTURALLY_SYMMETRIC:
1228   case MAT_HERMITIAN:
1229   case MAT_SYMMETRY_ETERNAL:
1230     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1231     break;
1232   default:
1233     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatGetRow_MPIAIJ"
1240 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1241 {
1242   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1243   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1244   PetscErrorCode ierr;
1245   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1246   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1247   PetscInt       *cmap,*idx_p;
1248 
1249   PetscFunctionBegin;
1250   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1251   mat->getrowactive = PETSC_TRUE;
1252 
1253   if (!mat->rowvalues && (idx || v)) {
1254     /*
1255         allocate enough space to hold information from the longest row.
1256     */
1257     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1258     PetscInt     max = 1,tmp;
1259     for (i=0; i<matin->rmap.n; i++) {
1260       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1261       if (max < tmp) { max = tmp; }
1262     }
1263     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1264     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1265   }
1266 
1267   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1268   lrow = row - rstart;
1269 
1270   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1271   if (!v)   {pvA = 0; pvB = 0;}
1272   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1273   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1274   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1275   nztot = nzA + nzB;
1276 
1277   cmap  = mat->garray;
1278   if (v  || idx) {
1279     if (nztot) {
1280       /* Sort by increasing column numbers, assuming A and B already sorted */
1281       PetscInt imark = -1;
1282       if (v) {
1283         *v = v_p = mat->rowvalues;
1284         for (i=0; i<nzB; i++) {
1285           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1286           else break;
1287         }
1288         imark = i;
1289         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1290         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1291       }
1292       if (idx) {
1293         *idx = idx_p = mat->rowindices;
1294         if (imark > -1) {
1295           for (i=0; i<imark; i++) {
1296             idx_p[i] = cmap[cworkB[i]];
1297           }
1298         } else {
1299           for (i=0; i<nzB; i++) {
1300             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1301             else break;
1302           }
1303           imark = i;
1304         }
1305         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1306         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1307       }
1308     } else {
1309       if (idx) *idx = 0;
1310       if (v)   *v   = 0;
1311     }
1312   }
1313   *nz = nztot;
1314   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1315   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1321 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1324 
1325   PetscFunctionBegin;
1326   if (!aij->getrowactive) {
1327     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1328   }
1329   aij->getrowactive = PETSC_FALSE;
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatNorm_MPIAIJ"
1335 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1336 {
1337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1338   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1339   PetscErrorCode ierr;
1340   PetscInt       i,j,cstart = mat->cmap.rstart;
1341   PetscReal      sum = 0.0;
1342   PetscScalar    *v;
1343 
1344   PetscFunctionBegin;
1345   if (aij->size == 1) {
1346     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1347   } else {
1348     if (type == NORM_FROBENIUS) {
1349       v = amat->a;
1350       for (i=0; i<amat->nz; i++) {
1351 #if defined(PETSC_USE_COMPLEX)
1352         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1353 #else
1354         sum += (*v)*(*v); v++;
1355 #endif
1356       }
1357       v = bmat->a;
1358       for (i=0; i<bmat->nz; i++) {
1359 #if defined(PETSC_USE_COMPLEX)
1360         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1361 #else
1362         sum += (*v)*(*v); v++;
1363 #endif
1364       }
1365       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1366       *norm = sqrt(*norm);
1367     } else if (type == NORM_1) { /* max column norm */
1368       PetscReal *tmp,*tmp2;
1369       PetscInt    *jj,*garray = aij->garray;
1370       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1371       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1372       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1373       *norm = 0.0;
1374       v = amat->a; jj = amat->j;
1375       for (j=0; j<amat->nz; j++) {
1376         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1377       }
1378       v = bmat->a; jj = bmat->j;
1379       for (j=0; j<bmat->nz; j++) {
1380         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1381       }
1382       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383       for (j=0; j<mat->cmap.N; j++) {
1384         if (tmp2[j] > *norm) *norm = tmp2[j];
1385       }
1386       ierr = PetscFree(tmp);CHKERRQ(ierr);
1387       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1388     } else if (type == NORM_INFINITY) { /* max row norm */
1389       PetscReal ntemp = 0.0;
1390       for (j=0; j<aij->A->rmap.n; j++) {
1391         v = amat->a + amat->i[j];
1392         sum = 0.0;
1393         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1394           sum += PetscAbsScalar(*v); v++;
1395         }
1396         v = bmat->a + bmat->i[j];
1397         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1398           sum += PetscAbsScalar(*v); v++;
1399         }
1400         if (sum > ntemp) ntemp = sum;
1401       }
1402       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1403     } else {
1404       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1405     }
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatTranspose_MPIAIJ"
1412 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1413 {
1414   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1415   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1416   PetscErrorCode ierr;
1417   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1418   PetscInt       cstart=A->cmap.rstart,ncol;
1419   Mat            B;
1420   PetscScalar    *array;
1421 
1422   PetscFunctionBegin;
1423   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1424 
1425   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1426   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1427   ai = Aloc->i; aj = Aloc->j;
1428   bi = Bloc->i; bj = Bloc->j;
1429   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1430   cols = d_nnz + na + 1; /* work space to be used by B part */
1431   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1432   for (i=0; i<ai[ma]; i++){
1433     d_nnz[aj[i]] ++;
1434     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1435   }
1436 
1437   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1438   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1439   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1440   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1441 
1442   /* copy over the A part */
1443   array = Aloc->a;
1444   row = A->rmap.rstart;
1445   for (i=0; i<ma; i++) {
1446     ncol = ai[i+1]-ai[i];
1447     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1448     row++; array += ncol; aj += ncol;
1449   }
1450   aj = Aloc->j;
1451   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1452 
1453   /* copy over the B part */
1454   array = Bloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1457   for (i=0; i<mb; i++) {
1458     ncol = bi[i+1]-bi[i];
1459     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1460     row++; array += ncol; cols += ncol;
1461   }
1462   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1463   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465   if (matout) {
1466     *matout = B;
1467   } else {
1468     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1476 {
1477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1478   Mat            a = aij->A,b = aij->B;
1479   PetscErrorCode ierr;
1480   PetscInt       s1,s2,s3;
1481 
1482   PetscFunctionBegin;
1483   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1484   if (rr) {
1485     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1486     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1487     /* Overlap communication with computation. */
1488     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489   }
1490   if (ll) {
1491     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1492     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1493     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1494   }
1495   /* scale  the diagonal block */
1496   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1497 
1498   if (rr) {
1499     /* Do a scatter end and then right scale the off-diagonal block */
1500     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1502   }
1503 
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1509 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1510 {
1511   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1516   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1521 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1522 {
1523   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatEqual_MPIAIJ"
1533 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1534 {
1535   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1536   Mat            a,b,c,d;
1537   PetscTruth     flg;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   a = matA->A; b = matA->B;
1542   c = matB->A; d = matB->B;
1543 
1544   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1545   if (flg) {
1546     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1547   }
1548   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatCopy_MPIAIJ"
1554 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1555 {
1556   PetscErrorCode ierr;
1557   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1558   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1559 
1560   PetscFunctionBegin;
1561   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1562   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1563     /* because of the column compression in the off-processor part of the matrix a->B,
1564        the number of columns in a->B and b->B may be different, hence we cannot call
1565        the MatCopy() directly on the two parts. If need be, we can provide a more
1566        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1567        then copying the submatrices */
1568     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1569   } else {
1570     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1571     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1572   }
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1578 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1579 {
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #include "petscblaslapack.h"
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatAXPY_MPIAIJ"
1590 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1591 {
1592   PetscErrorCode ierr;
1593   PetscInt       i;
1594   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1595   PetscBLASInt   bnz,one=1;
1596   Mat_SeqAIJ     *x,*y;
1597 
1598   PetscFunctionBegin;
1599   if (str == SAME_NONZERO_PATTERN) {
1600     PetscScalar alpha = a;
1601     x = (Mat_SeqAIJ *)xx->A->data;
1602     y = (Mat_SeqAIJ *)yy->A->data;
1603     bnz = PetscBLASIntCast(x->nz);
1604     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1605     x = (Mat_SeqAIJ *)xx->B->data;
1606     y = (Mat_SeqAIJ *)yy->B->data;
1607     bnz = PetscBLASIntCast(x->nz);
1608     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1609   } else if (str == SUBSET_NONZERO_PATTERN) {
1610     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1611 
1612     x = (Mat_SeqAIJ *)xx->B->data;
1613     y = (Mat_SeqAIJ *)yy->B->data;
1614     if (y->xtoy && y->XtoY != xx->B) {
1615       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1616       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1617     }
1618     if (!y->xtoy) { /* get xtoy */
1619       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1620       y->XtoY = xx->B;
1621       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1622     }
1623     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1624   } else {
1625     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatConjugate_MPIAIJ"
1634 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1635 {
1636 #if defined(PETSC_USE_COMPLEX)
1637   PetscErrorCode ierr;
1638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1639 
1640   PetscFunctionBegin;
1641   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1642   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1643 #else
1644   PetscFunctionBegin;
1645 #endif
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatRealPart_MPIAIJ"
1651 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1652 {
1653   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1654   PetscErrorCode ierr;
1655 
1656   PetscFunctionBegin;
1657   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1658   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1664 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1665 {
1666   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1671   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #ifdef PETSC_HAVE_PBGL
1676 
1677 #include <boost/parallel/mpi/bsp_process_group.hpp>
1678 #include <boost/graph/distributed/ilu_default_graph.hpp>
1679 #include <boost/graph/distributed/ilu_0_block.hpp>
1680 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1681 #include <boost/graph/distributed/petsc/interface.hpp>
1682 #include <boost/multi_array.hpp>
1683 #include <boost/parallel/distributed_property_map.hpp>
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1687 /*
1688   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1689 */
1690 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1691 {
1692   namespace petsc = boost::distributed::petsc;
1693 
1694   namespace graph_dist = boost::graph::distributed;
1695   using boost::graph::distributed::ilu_default::process_group_type;
1696   using boost::graph::ilu_permuted;
1697 
1698   PetscTruth      row_identity, col_identity;
1699   PetscContainer  c;
1700   PetscInt        m, n, M, N;
1701   PetscErrorCode  ierr;
1702 
1703   PetscFunctionBegin;
1704   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1705   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1706   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1707   if (!row_identity || !col_identity) {
1708     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1709   }
1710 
1711   process_group_type pg;
1712   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1713   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1714   lgraph_type&   level_graph = *lgraph_p;
1715   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1716 
1717   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1718   ilu_permuted(level_graph);
1719 
1720   /* put together the new matrix */
1721   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1722   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1723   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1724   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1725   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1726   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   (*fact)->factor = FACTOR_LU;
1729 
1730   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1731   ierr = PetscContainerSetPointer(c, lgraph_p);
1732   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1738 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1739 {
1740   PetscFunctionBegin;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSolve_MPIAIJ"
1746 /*
1747   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1748 */
1749 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1750 {
1751   namespace graph_dist = boost::graph::distributed;
1752 
1753   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1754   lgraph_type*   lgraph_p;
1755   PetscContainer c;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1760   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1761   ierr = VecCopy(b, x); CHKERRQ(ierr);
1762 
1763   PetscScalar* array_x;
1764   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1765   PetscInt sx;
1766   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1767 
1768   PetscScalar* array_b;
1769   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1770   PetscInt sb;
1771   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1772 
1773   lgraph_type&   level_graph = *lgraph_p;
1774   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1775 
1776   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1777   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1778                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1779 
1780   typedef boost::iterator_property_map<array_ref_type::iterator,
1781                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1782   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1783                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1784 
1785   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1786 
1787   PetscFunctionReturn(0);
1788 }
1789 #endif
1790 
1791 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1792   PetscInt       nzlocal,nsends,nrecvs;
1793   PetscMPIInt    *send_rank;
1794   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1795   PetscScalar    *sbuf_a,**rbuf_a;
1796   PetscErrorCode (*MatDestroy)(Mat);
1797 } Mat_Redundant;
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1801 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1802 {
1803   PetscErrorCode       ierr;
1804   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1805   PetscInt             i;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1809   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1810   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1811   for (i=0; i<redund->nrecvs; i++){
1812     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1813     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1814   }
1815   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1816   ierr = PetscFree(redund);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatDestroy_MatRedundant"
1822 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1823 {
1824   PetscErrorCode  ierr;
1825   PetscContainer  container;
1826   Mat_Redundant   *redund=PETSC_NULL;
1827 
1828   PetscFunctionBegin;
1829   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1830   if (container) {
1831     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1832   } else {
1833     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1834   }
1835   A->ops->destroy = redund->MatDestroy;
1836   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1837   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1838   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1844 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1845 {
1846   PetscMPIInt    rank,size;
1847   MPI_Comm       comm=((PetscObject)mat)->comm;
1848   PetscErrorCode ierr;
1849   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1850   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1851   PetscInt       *rowrange=mat->rmap.range;
1852   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1853   Mat            A=aij->A,B=aij->B,C=*matredundant;
1854   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1855   PetscScalar    *sbuf_a;
1856   PetscInt       nzlocal=a->nz+b->nz;
1857   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1858   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1859   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1860   PetscScalar    *vals,*aworkA,*aworkB;
1861   PetscMPIInt    tag1,tag2,tag3,imdex;
1862   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1863                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1864   MPI_Status     recv_status,*send_status;
1865   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1866   PetscInt       **rbuf_j=PETSC_NULL;
1867   PetscScalar    **rbuf_a=PETSC_NULL;
1868   Mat_Redundant  *redund=PETSC_NULL;
1869   PetscContainer container;
1870 
1871   PetscFunctionBegin;
1872   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1874 
1875   if (reuse == MAT_REUSE_MATRIX) {
1876     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1877     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1878     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1879     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1880     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1881     if (container) {
1882       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1883     } else {
1884       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1885     }
1886     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1887 
1888     nsends    = redund->nsends;
1889     nrecvs    = redund->nrecvs;
1890     send_rank = redund->send_rank; recv_rank = send_rank + size;
1891     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1892     sbuf_j    = redund->sbuf_j;
1893     sbuf_a    = redund->sbuf_a;
1894     rbuf_j    = redund->rbuf_j;
1895     rbuf_a    = redund->rbuf_a;
1896   }
1897 
1898   if (reuse == MAT_INITIAL_MATRIX){
1899     PetscMPIInt  subrank,subsize;
1900     PetscInt     nleftover,np_subcomm;
1901     /* get the destination processors' id send_rank, nsends and nrecvs */
1902     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1903     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1904     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1905     recv_rank = send_rank + size;
1906     np_subcomm = size/nsubcomm;
1907     nleftover  = size - nsubcomm*np_subcomm;
1908     nsends = 0; nrecvs = 0;
1909     for (i=0; i<size; i++){ /* i=rank*/
1910       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1911         send_rank[nsends] = i; nsends++;
1912         recv_rank[nrecvs++] = i;
1913       }
1914     }
1915     if (rank >= size - nleftover){/* this proc is a leftover processor */
1916       i = size-nleftover-1;
1917       j = 0;
1918       while (j < nsubcomm - nleftover){
1919         send_rank[nsends++] = i;
1920         i--; j++;
1921       }
1922     }
1923 
1924     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1925       for (i=0; i<nleftover; i++){
1926         recv_rank[nrecvs++] = size-nleftover+i;
1927       }
1928     }
1929 
1930     /* allocate sbuf_j, sbuf_a */
1931     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1932     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1933     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1934   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1935 
1936   /* copy mat's local entries into the buffers */
1937   if (reuse == MAT_INITIAL_MATRIX){
1938     rownz_max = 0;
1939     rptr = sbuf_j;
1940     cols = sbuf_j + rend-rstart + 1;
1941     vals = sbuf_a;
1942     rptr[0] = 0;
1943     for (i=0; i<rend-rstart; i++){
1944       row = i + rstart;
1945       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1946       ncols  = nzA + nzB;
1947       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1948       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1949       /* load the column indices for this row into cols */
1950       lwrite = 0;
1951       for (l=0; l<nzB; l++) {
1952         if ((ctmp = bmap[cworkB[l]]) < cstart){
1953           vals[lwrite]   = aworkB[l];
1954           cols[lwrite++] = ctmp;
1955         }
1956       }
1957       for (l=0; l<nzA; l++){
1958         vals[lwrite]   = aworkA[l];
1959         cols[lwrite++] = cstart + cworkA[l];
1960       }
1961       for (l=0; l<nzB; l++) {
1962         if ((ctmp = bmap[cworkB[l]]) >= cend){
1963           vals[lwrite]   = aworkB[l];
1964           cols[lwrite++] = ctmp;
1965         }
1966       }
1967       vals += ncols;
1968       cols += ncols;
1969       rptr[i+1] = rptr[i] + ncols;
1970       if (rownz_max < ncols) rownz_max = ncols;
1971     }
1972     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1973   } else { /* only copy matrix values into sbuf_a */
1974     rptr = sbuf_j;
1975     vals = sbuf_a;
1976     rptr[0] = 0;
1977     for (i=0; i<rend-rstart; i++){
1978       row = i + rstart;
1979       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1980       ncols  = nzA + nzB;
1981       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1982       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1983       lwrite = 0;
1984       for (l=0; l<nzB; l++) {
1985         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1986       }
1987       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1988       for (l=0; l<nzB; l++) {
1989         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1990       }
1991       vals += ncols;
1992       rptr[i+1] = rptr[i] + ncols;
1993     }
1994   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1995 
1996   /* send nzlocal to others, and recv other's nzlocal */
1997   /*--------------------------------------------------*/
1998   if (reuse == MAT_INITIAL_MATRIX){
1999     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2000     s_waits2 = s_waits3 + nsends;
2001     s_waits1 = s_waits2 + nsends;
2002     r_waits1 = s_waits1 + nsends;
2003     r_waits2 = r_waits1 + nrecvs;
2004     r_waits3 = r_waits2 + nrecvs;
2005   } else {
2006     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2007     r_waits3 = s_waits3 + nsends;
2008   }
2009 
2010   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2011   if (reuse == MAT_INITIAL_MATRIX){
2012     /* get new tags to keep the communication clean */
2013     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2014     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2015     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2016     rbuf_nz = sbuf_nz + nsends;
2017 
2018     /* post receives of other's nzlocal */
2019     for (i=0; i<nrecvs; i++){
2020       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2021     }
2022     /* send nzlocal to others */
2023     for (i=0; i<nsends; i++){
2024       sbuf_nz[i] = nzlocal;
2025       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2026     }
2027     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2028     count = nrecvs;
2029     while (count) {
2030       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2031       recv_rank[imdex] = recv_status.MPI_SOURCE;
2032       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2033       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2034 
2035       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2036       rbuf_nz[imdex] += i + 2;
2037       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2038       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2039       count--;
2040     }
2041     /* wait on sends of nzlocal */
2042     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2043     /* send mat->i,j to others, and recv from other's */
2044     /*------------------------------------------------*/
2045     for (i=0; i<nsends; i++){
2046       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2047       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2048     }
2049     /* wait on receives of mat->i,j */
2050     /*------------------------------*/
2051     count = nrecvs;
2052     while (count) {
2053       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2054       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2055       count--;
2056     }
2057     /* wait on sends of mat->i,j */
2058     /*---------------------------*/
2059     if (nsends) {
2060       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2061     }
2062   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2063 
2064   /* post receives, send and receive mat->a */
2065   /*----------------------------------------*/
2066   for (imdex=0; imdex<nrecvs; imdex++) {
2067     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2068   }
2069   for (i=0; i<nsends; i++){
2070     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2071   }
2072   count = nrecvs;
2073   while (count) {
2074     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2075     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2076     count--;
2077   }
2078   if (nsends) {
2079     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2080   }
2081 
2082   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2083 
2084   /* create redundant matrix */
2085   /*-------------------------*/
2086   if (reuse == MAT_INITIAL_MATRIX){
2087     /* compute rownz_max for preallocation */
2088     for (imdex=0; imdex<nrecvs; imdex++){
2089       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2090       rptr = rbuf_j[imdex];
2091       for (i=0; i<j; i++){
2092         ncols = rptr[i+1] - rptr[i];
2093         if (rownz_max < ncols) rownz_max = ncols;
2094       }
2095     }
2096 
2097     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2098     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2099     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2100     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2101     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2102   } else {
2103     C = *matredundant;
2104   }
2105 
2106   /* insert local matrix entries */
2107   rptr = sbuf_j;
2108   cols = sbuf_j + rend-rstart + 1;
2109   vals = sbuf_a;
2110   for (i=0; i<rend-rstart; i++){
2111     row   = i + rstart;
2112     ncols = rptr[i+1] - rptr[i];
2113     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2114     vals += ncols;
2115     cols += ncols;
2116   }
2117   /* insert received matrix entries */
2118   for (imdex=0; imdex<nrecvs; imdex++){
2119     rstart = rowrange[recv_rank[imdex]];
2120     rend   = rowrange[recv_rank[imdex]+1];
2121     rptr = rbuf_j[imdex];
2122     cols = rbuf_j[imdex] + rend-rstart + 1;
2123     vals = rbuf_a[imdex];
2124     for (i=0; i<rend-rstart; i++){
2125       row   = i + rstart;
2126       ncols = rptr[i+1] - rptr[i];
2127       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2128       vals += ncols;
2129       cols += ncols;
2130     }
2131   }
2132   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2135   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2136   if (reuse == MAT_INITIAL_MATRIX){
2137     PetscContainer container;
2138     *matredundant = C;
2139     /* create a supporting struct and attach it to C for reuse */
2140     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2141     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2142     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2143     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2144     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2145 
2146     redund->nzlocal = nzlocal;
2147     redund->nsends  = nsends;
2148     redund->nrecvs  = nrecvs;
2149     redund->send_rank = send_rank;
2150     redund->sbuf_nz = sbuf_nz;
2151     redund->sbuf_j  = sbuf_j;
2152     redund->sbuf_a  = sbuf_a;
2153     redund->rbuf_j  = rbuf_j;
2154     redund->rbuf_a  = rbuf_a;
2155 
2156     redund->MatDestroy = C->ops->destroy;
2157     C->ops->destroy    = MatDestroy_MatRedundant;
2158   }
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2164 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2165 {
2166   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2167   PetscInt       n      = A->rmap.n;
2168   PetscInt       cstart = A->cmap.rstart;
2169   PetscInt      *cmap   = mat->garray;
2170   PetscInt      *diagIdx, *offdiagIdx;
2171   Vec            diagV, offdiagV;
2172   PetscScalar   *a, *diagA, *offdiagA;
2173   PetscInt       r;
2174   PetscErrorCode ierr;
2175 
2176   PetscFunctionBegin;
2177   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2178   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2179   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2180   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2181   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2182   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2183   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2184   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2185   for(r = 0; r < n; ++r) {
2186     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2187       a[r]   = diagA[r];
2188       idx[r] = cstart + diagIdx[r];
2189     } else {
2190       a[r]   = offdiagA[r];
2191       idx[r] = cmap[offdiagIdx[r]];
2192     }
2193   }
2194   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2195   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2196   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2197   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2198   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2199   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2205 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2206 {
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2211   PetscFunctionReturn(0);
2212 }
2213 
2214 /* -------------------------------------------------------------------*/
2215 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2216        MatGetRow_MPIAIJ,
2217        MatRestoreRow_MPIAIJ,
2218        MatMult_MPIAIJ,
2219 /* 4*/ MatMultAdd_MPIAIJ,
2220        MatMultTranspose_MPIAIJ,
2221        MatMultTransposeAdd_MPIAIJ,
2222 #ifdef PETSC_HAVE_PBGL
2223        MatSolve_MPIAIJ,
2224 #else
2225        0,
2226 #endif
2227        0,
2228        0,
2229 /*10*/ 0,
2230        0,
2231        0,
2232        MatRelax_MPIAIJ,
2233        MatTranspose_MPIAIJ,
2234 /*15*/ MatGetInfo_MPIAIJ,
2235        MatEqual_MPIAIJ,
2236        MatGetDiagonal_MPIAIJ,
2237        MatDiagonalScale_MPIAIJ,
2238        MatNorm_MPIAIJ,
2239 /*20*/ MatAssemblyBegin_MPIAIJ,
2240        MatAssemblyEnd_MPIAIJ,
2241        0,
2242        MatSetOption_MPIAIJ,
2243        MatZeroEntries_MPIAIJ,
2244 /*25*/ MatZeroRows_MPIAIJ,
2245        0,
2246 #ifdef PETSC_HAVE_PBGL
2247        MatLUFactorNumeric_MPIAIJ,
2248 #else
2249        0,
2250 #endif
2251        0,
2252        0,
2253 /*30*/ MatSetUpPreallocation_MPIAIJ,
2254 #ifdef PETSC_HAVE_PBGL
2255        MatILUFactorSymbolic_MPIAIJ,
2256 #else
2257        0,
2258 #endif
2259        0,
2260        0,
2261        0,
2262 /*35*/ MatDuplicate_MPIAIJ,
2263        0,
2264        0,
2265        0,
2266        0,
2267 /*40*/ MatAXPY_MPIAIJ,
2268        MatGetSubMatrices_MPIAIJ,
2269        MatIncreaseOverlap_MPIAIJ,
2270        MatGetValues_MPIAIJ,
2271        MatCopy_MPIAIJ,
2272 /*45*/ 0,
2273        MatScale_MPIAIJ,
2274        0,
2275        0,
2276        0,
2277 /*50*/ MatSetBlockSize_MPIAIJ,
2278        0,
2279        0,
2280        0,
2281        0,
2282 /*55*/ MatFDColoringCreate_MPIAIJ,
2283        0,
2284        MatSetUnfactored_MPIAIJ,
2285        MatPermute_MPIAIJ,
2286        0,
2287 /*60*/ MatGetSubMatrix_MPIAIJ,
2288        MatDestroy_MPIAIJ,
2289        MatView_MPIAIJ,
2290        0,
2291        0,
2292 /*65*/ 0,
2293        0,
2294        0,
2295        0,
2296        0,
2297 /*70*/ 0,
2298        0,
2299        MatSetColoring_MPIAIJ,
2300 #if defined(PETSC_HAVE_ADIC)
2301        MatSetValuesAdic_MPIAIJ,
2302 #else
2303        0,
2304 #endif
2305        MatSetValuesAdifor_MPIAIJ,
2306 /*75*/ 0,
2307        0,
2308        0,
2309        0,
2310        0,
2311 /*80*/ 0,
2312        0,
2313        0,
2314        0,
2315 /*84*/ MatLoad_MPIAIJ,
2316        0,
2317        0,
2318        0,
2319        0,
2320        0,
2321 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2322        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2323        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2324        MatPtAP_Basic,
2325        MatPtAPSymbolic_MPIAIJ,
2326 /*95*/ MatPtAPNumeric_MPIAIJ,
2327        0,
2328        0,
2329        0,
2330        0,
2331 /*100*/0,
2332        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2333        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2334        MatConjugate_MPIAIJ,
2335        0,
2336 /*105*/MatSetValuesRow_MPIAIJ,
2337        MatRealPart_MPIAIJ,
2338        MatImaginaryPart_MPIAIJ,
2339        0,
2340        0,
2341 /*110*/0,
2342        MatGetRedundantMatrix_MPIAIJ,
2343        MatGetRowMin_MPIAIJ,
2344        0,
2345        0,
2346 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2347 
2348 /* ----------------------------------------------------------------------------------------*/
2349 
2350 EXTERN_C_BEGIN
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2353 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2354 {
2355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2360   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2361   PetscFunctionReturn(0);
2362 }
2363 EXTERN_C_END
2364 
2365 EXTERN_C_BEGIN
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2368 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2369 {
2370   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2375   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 EXTERN_C_END
2379 
2380 #include "petscpc.h"
2381 EXTERN_C_BEGIN
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2384 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2385 {
2386   Mat_MPIAIJ     *b;
2387   PetscErrorCode ierr;
2388   PetscInt       i;
2389 
2390   PetscFunctionBegin;
2391   B->preallocated = PETSC_TRUE;
2392   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2393   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2394   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2395   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2396 
2397   B->rmap.bs = B->cmap.bs = 1;
2398   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2399   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2400   if (d_nnz) {
2401     for (i=0; i<B->rmap.n; i++) {
2402       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2403     }
2404   }
2405   if (o_nnz) {
2406     for (i=0; i<B->rmap.n; i++) {
2407       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2408     }
2409   }
2410   b = (Mat_MPIAIJ*)B->data;
2411 
2412   /* Explicitly create 2 MATSEQAIJ matrices. */
2413   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2414   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2415   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2416   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2417   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2418   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2419   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2420   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2421 
2422   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2423   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2424 
2425   PetscFunctionReturn(0);
2426 }
2427 EXTERN_C_END
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2431 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2432 {
2433   Mat            mat;
2434   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2435   PetscErrorCode ierr;
2436 
2437   PetscFunctionBegin;
2438   *newmat       = 0;
2439   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2440   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2441   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2442   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2443   a    = (Mat_MPIAIJ*)mat->data;
2444 
2445   mat->factor       = matin->factor;
2446   mat->rmap.bs      = matin->rmap.bs;
2447   mat->assembled    = PETSC_TRUE;
2448   mat->insertmode   = NOT_SET_VALUES;
2449   mat->preallocated = PETSC_TRUE;
2450 
2451   a->size           = oldmat->size;
2452   a->rank           = oldmat->rank;
2453   a->donotstash     = oldmat->donotstash;
2454   a->roworiented    = oldmat->roworiented;
2455   a->rowindices     = 0;
2456   a->rowvalues      = 0;
2457   a->getrowactive   = PETSC_FALSE;
2458 
2459   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2460   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2461 
2462   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2463   if (oldmat->colmap) {
2464 #if defined (PETSC_USE_CTABLE)
2465     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2466 #else
2467     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2468     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2469     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2470 #endif
2471   } else a->colmap = 0;
2472   if (oldmat->garray) {
2473     PetscInt len;
2474     len  = oldmat->B->cmap.n;
2475     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2476     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2477     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2478   } else a->garray = 0;
2479 
2480   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2481   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2482   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2483   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2484   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2485   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2486   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2487   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2488   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2489   *newmat = mat;
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 #include "petscsys.h"
2494 
2495 #undef __FUNCT__
2496 #define __FUNCT__ "MatLoad_MPIAIJ"
2497 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2498 {
2499   Mat            A;
2500   PetscScalar    *vals,*svals;
2501   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2502   MPI_Status     status;
2503   PetscErrorCode ierr;
2504   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2505   PetscInt       i,nz,j,rstart,rend,mmax;
2506   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2507   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2508   PetscInt       cend,cstart,n,*rowners;
2509   int            fd;
2510 
2511   PetscFunctionBegin;
2512   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2513   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2514   if (!rank) {
2515     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2516     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2517     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2518   }
2519 
2520   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2521   M = header[1]; N = header[2];
2522   /* determine ownership of all rows */
2523   m    = M/size + ((M % size) > rank);
2524   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2525   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2526 
2527   /* First process needs enough room for process with most rows */
2528   if (!rank) {
2529     mmax       = rowners[1];
2530     for (i=2; i<size; i++) {
2531       mmax = PetscMax(mmax,rowners[i]);
2532     }
2533   } else mmax = m;
2534 
2535   rowners[0] = 0;
2536   for (i=2; i<=size; i++) {
2537     rowners[i] += rowners[i-1];
2538   }
2539   rstart = rowners[rank];
2540   rend   = rowners[rank+1];
2541 
2542   /* distribute row lengths to all processors */
2543   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2544   if (!rank) {
2545     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2546     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2547     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2548     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2549     for (j=0; j<m; j++) {
2550       procsnz[0] += ourlens[j];
2551     }
2552     for (i=1; i<size; i++) {
2553       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2554       /* calculate the number of nonzeros on each processor */
2555       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2556         procsnz[i] += rowlengths[j];
2557       }
2558       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2559     }
2560     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2561   } else {
2562     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2563   }
2564 
2565   if (!rank) {
2566     /* determine max buffer needed and allocate it */
2567     maxnz = 0;
2568     for (i=0; i<size; i++) {
2569       maxnz = PetscMax(maxnz,procsnz[i]);
2570     }
2571     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2572 
2573     /* read in my part of the matrix column indices  */
2574     nz   = procsnz[0];
2575     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2576     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2577 
2578     /* read in every one elses and ship off */
2579     for (i=1; i<size; i++) {
2580       nz   = procsnz[i];
2581       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2582       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2583     }
2584     ierr = PetscFree(cols);CHKERRQ(ierr);
2585   } else {
2586     /* determine buffer space needed for message */
2587     nz = 0;
2588     for (i=0; i<m; i++) {
2589       nz += ourlens[i];
2590     }
2591     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2592 
2593     /* receive message of column indices*/
2594     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2595     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2596     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2597   }
2598 
2599   /* determine column ownership if matrix is not square */
2600   if (N != M) {
2601     n      = N/size + ((N % size) > rank);
2602     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2603     cstart = cend - n;
2604   } else {
2605     cstart = rstart;
2606     cend   = rend;
2607     n      = cend - cstart;
2608   }
2609 
2610   /* loop over local rows, determining number of off diagonal entries */
2611   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2612   jj = 0;
2613   for (i=0; i<m; i++) {
2614     for (j=0; j<ourlens[i]; j++) {
2615       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2616       jj++;
2617     }
2618   }
2619 
2620   /* create our matrix */
2621   for (i=0; i<m; i++) {
2622     ourlens[i] -= offlens[i];
2623   }
2624   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2625   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2626   ierr = MatSetType(A,type);CHKERRQ(ierr);
2627   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2628 
2629   for (i=0; i<m; i++) {
2630     ourlens[i] += offlens[i];
2631   }
2632 
2633   if (!rank) {
2634     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2635 
2636     /* read in my part of the matrix numerical values  */
2637     nz   = procsnz[0];
2638     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2639 
2640     /* insert into matrix */
2641     jj      = rstart;
2642     smycols = mycols;
2643     svals   = vals;
2644     for (i=0; i<m; i++) {
2645       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2646       smycols += ourlens[i];
2647       svals   += ourlens[i];
2648       jj++;
2649     }
2650 
2651     /* read in other processors and ship out */
2652     for (i=1; i<size; i++) {
2653       nz   = procsnz[i];
2654       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2655       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2656     }
2657     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2658   } else {
2659     /* receive numeric values */
2660     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2661 
2662     /* receive message of values*/
2663     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2664     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2665     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2666 
2667     /* insert into matrix */
2668     jj      = rstart;
2669     smycols = mycols;
2670     svals   = vals;
2671     for (i=0; i<m; i++) {
2672       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2673       smycols += ourlens[i];
2674       svals   += ourlens[i];
2675       jj++;
2676     }
2677   }
2678   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2679   ierr = PetscFree(vals);CHKERRQ(ierr);
2680   ierr = PetscFree(mycols);CHKERRQ(ierr);
2681   ierr = PetscFree(rowners);CHKERRQ(ierr);
2682 
2683   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   *newmat = A;
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2691 /*
2692     Not great since it makes two copies of the submatrix, first an SeqAIJ
2693   in local and then by concatenating the local matrices the end result.
2694   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2695 */
2696 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2697 {
2698   PetscErrorCode ierr;
2699   PetscMPIInt    rank,size;
2700   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2701   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2702   Mat            *local,M,Mreuse;
2703   PetscScalar    *vwork,*aa;
2704   MPI_Comm       comm = ((PetscObject)mat)->comm;
2705   Mat_SeqAIJ     *aij;
2706 
2707 
2708   PetscFunctionBegin;
2709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2710   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2711 
2712   if (call ==  MAT_REUSE_MATRIX) {
2713     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2714     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2715     local = &Mreuse;
2716     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2717   } else {
2718     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2719     Mreuse = *local;
2720     ierr   = PetscFree(local);CHKERRQ(ierr);
2721   }
2722 
2723   /*
2724       m - number of local rows
2725       n - number of columns (same on all processors)
2726       rstart - first row in new global matrix generated
2727   */
2728   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2729   if (call == MAT_INITIAL_MATRIX) {
2730     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2731     ii  = aij->i;
2732     jj  = aij->j;
2733 
2734     /*
2735         Determine the number of non-zeros in the diagonal and off-diagonal
2736         portions of the matrix in order to do correct preallocation
2737     */
2738 
2739     /* first get start and end of "diagonal" columns */
2740     if (csize == PETSC_DECIDE) {
2741       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2742       if (mglobal == n) { /* square matrix */
2743 	nlocal = m;
2744       } else {
2745         nlocal = n/size + ((n % size) > rank);
2746       }
2747     } else {
2748       nlocal = csize;
2749     }
2750     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2751     rstart = rend - nlocal;
2752     if (rank == size - 1 && rend != n) {
2753       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2754     }
2755 
2756     /* next, compute all the lengths */
2757     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2758     olens = dlens + m;
2759     for (i=0; i<m; i++) {
2760       jend = ii[i+1] - ii[i];
2761       olen = 0;
2762       dlen = 0;
2763       for (j=0; j<jend; j++) {
2764         if (*jj < rstart || *jj >= rend) olen++;
2765         else dlen++;
2766         jj++;
2767       }
2768       olens[i] = olen;
2769       dlens[i] = dlen;
2770     }
2771     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2772     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2773     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2774     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2775     ierr = PetscFree(dlens);CHKERRQ(ierr);
2776   } else {
2777     PetscInt ml,nl;
2778 
2779     M = *newmat;
2780     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2781     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2782     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2783     /*
2784          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2785        rather than the slower MatSetValues().
2786     */
2787     M->was_assembled = PETSC_TRUE;
2788     M->assembled     = PETSC_FALSE;
2789   }
2790   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2791   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2792   ii  = aij->i;
2793   jj  = aij->j;
2794   aa  = aij->a;
2795   for (i=0; i<m; i++) {
2796     row   = rstart + i;
2797     nz    = ii[i+1] - ii[i];
2798     cwork = jj;     jj += nz;
2799     vwork = aa;     aa += nz;
2800     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2801   }
2802 
2803   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2804   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2805   *newmat = M;
2806 
2807   /* save submatrix used in processor for next request */
2808   if (call ==  MAT_INITIAL_MATRIX) {
2809     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2810     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2811   }
2812 
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 EXTERN_C_BEGIN
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2819 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2820 {
2821   PetscInt       m,cstart, cend,j,nnz,i,d;
2822   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2823   const PetscInt *JJ;
2824   PetscScalar    *values;
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2829 
2830   B->rmap.bs = B->cmap.bs = 1;
2831   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2832   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2833   m      = B->rmap.n;
2834   cstart = B->cmap.rstart;
2835   cend   = B->cmap.rend;
2836   rstart = B->rmap.rstart;
2837 
2838   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2839   o_nnz = d_nnz + m;
2840 
2841 #if defined(PETSC_USE_DEBUGGING)
2842   for (i=0; i<m; i++) {
2843     nnz     = Ii[i+1]- Ii[i];
2844     JJ      = J + Ii[i];
2845     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2846     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
2847     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
2848     for (j=1; j<nnz; j++) {
2849       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
2850     }
2851   }
2852 #endif
2853 
2854   for (i=0; i<m; i++) {
2855     nnz     = Ii[i+1]- Ii[i];
2856     JJ      = J + Ii[i];
2857     nnz_max = PetscMax(nnz_max,nnz);
2858     for (j=0; j<nnz; j++) {
2859       if (*JJ >= cstart) break;
2860       JJ++;
2861     }
2862     d = 0;
2863     for (; j<nnz; j++) {
2864       if (*JJ++ >= cend) break;
2865       d++;
2866     }
2867     d_nnz[i] = d;
2868     o_nnz[i] = nnz - d;
2869   }
2870   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2871   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2872 
2873   if (v) values = (PetscScalar*)v;
2874   else {
2875     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2876     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2877   }
2878 
2879   for (i=0; i<m; i++) {
2880     ii   = i + rstart;
2881     nnz  = Ii[i+1]- Ii[i];
2882     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2883   }
2884   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2885   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886 
2887   if (!v) {
2888     ierr = PetscFree(values);CHKERRQ(ierr);
2889   }
2890   PetscFunctionReturn(0);
2891 }
2892 EXTERN_C_END
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2896 /*@
2897    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2898    (the default parallel PETSc format).
2899 
2900    Collective on MPI_Comm
2901 
2902    Input Parameters:
2903 +  B - the matrix
2904 .  i - the indices into j for the start of each local row (starts with zero)
2905 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2906 -  v - optional values in the matrix
2907 
2908    Level: developer
2909 
2910    Notes:
2911        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2912      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2913      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2914 
2915        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2916 
2917        The format which is used for the sparse matrix input, is equivalent to a
2918     row-major ordering.. i.e for the following matrix, the input data expected is
2919     as shown:
2920 
2921         1 0 0
2922         2 0 3     P0
2923        -------
2924         4 5 6     P1
2925 
2926      Process0 [P0]: rows_owned=[0,1]
2927         i =  {0,1,3}  [size = nrow+1  = 2+1]
2928         j =  {0,0,2}  [size = nz = 6]
2929         v =  {1,2,3}  [size = nz = 6]
2930 
2931      Process1 [P1]: rows_owned=[2]
2932         i =  {0,3}    [size = nrow+1  = 1+1]
2933         j =  {0,1,2}  [size = nz = 6]
2934         v =  {4,5,6}  [size = nz = 6]
2935 
2936       The column indices for each row MUST be sorted.
2937 
2938 .keywords: matrix, aij, compressed row, sparse, parallel
2939 
2940 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2941           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2942 @*/
2943 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2944 {
2945   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2946 
2947   PetscFunctionBegin;
2948   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2949   if (f) {
2950     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2951   }
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2957 /*@C
2958    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2959    (the default parallel PETSc format).  For good matrix assembly performance
2960    the user should preallocate the matrix storage by setting the parameters
2961    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2962    performance can be increased by more than a factor of 50.
2963 
2964    Collective on MPI_Comm
2965 
2966    Input Parameters:
2967 +  A - the matrix
2968 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2969            (same value is used for all local rows)
2970 .  d_nnz - array containing the number of nonzeros in the various rows of the
2971            DIAGONAL portion of the local submatrix (possibly different for each row)
2972            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2973            The size of this array is equal to the number of local rows, i.e 'm'.
2974            You must leave room for the diagonal entry even if it is zero.
2975 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2976            submatrix (same value is used for all local rows).
2977 -  o_nnz - array containing the number of nonzeros in the various rows of the
2978            OFF-DIAGONAL portion of the local submatrix (possibly different for
2979            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2980            structure. The size of this array is equal to the number
2981            of local rows, i.e 'm'.
2982 
2983    If the *_nnz parameter is given then the *_nz parameter is ignored
2984 
2985    The AIJ format (also called the Yale sparse matrix format or
2986    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2987    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2988 
2989    The parallel matrix is partitioned such that the first m0 rows belong to
2990    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2991    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2992 
2993    The DIAGONAL portion of the local submatrix of a processor can be defined
2994    as the submatrix which is obtained by extraction the part corresponding
2995    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2996    first row that belongs to the processor, and r2 is the last row belonging
2997    to the this processor. This is a square mxm matrix. The remaining portion
2998    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2999 
3000    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3001 
3002    You can call MatGetInfo() to get information on how effective the preallocation was;
3003    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3004    You can also run with the option -info and look for messages with the string
3005    malloc in them to see if additional memory allocation was needed.
3006 
3007    Example usage:
3008 
3009    Consider the following 8x8 matrix with 34 non-zero values, that is
3010    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3011    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3012    as follows:
3013 
3014 .vb
3015             1  2  0  |  0  3  0  |  0  4
3016     Proc0   0  5  6  |  7  0  0  |  8  0
3017             9  0 10  | 11  0  0  | 12  0
3018     -------------------------------------
3019            13  0 14  | 15 16 17  |  0  0
3020     Proc1   0 18  0  | 19 20 21  |  0  0
3021             0  0  0  | 22 23  0  | 24  0
3022     -------------------------------------
3023     Proc2  25 26 27  |  0  0 28  | 29  0
3024            30  0  0  | 31 32 33  |  0 34
3025 .ve
3026 
3027    This can be represented as a collection of submatrices as:
3028 
3029 .vb
3030       A B C
3031       D E F
3032       G H I
3033 .ve
3034 
3035    Where the submatrices A,B,C are owned by proc0, D,E,F are
3036    owned by proc1, G,H,I are owned by proc2.
3037 
3038    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3039    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3040    The 'M','N' parameters are 8,8, and have the same values on all procs.
3041 
3042    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3043    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3044    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3045    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3046    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3047    matrix, ans [DF] as another SeqAIJ matrix.
3048 
3049    When d_nz, o_nz parameters are specified, d_nz storage elements are
3050    allocated for every row of the local diagonal submatrix, and o_nz
3051    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3052    One way to choose d_nz and o_nz is to use the max nonzerors per local
3053    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3054    In this case, the values of d_nz,o_nz are:
3055 .vb
3056      proc0 : dnz = 2, o_nz = 2
3057      proc1 : dnz = 3, o_nz = 2
3058      proc2 : dnz = 1, o_nz = 4
3059 .ve
3060    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3061    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3062    for proc3. i.e we are using 12+15+10=37 storage locations to store
3063    34 values.
3064 
3065    When d_nnz, o_nnz parameters are specified, the storage is specified
3066    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3067    In the above case the values for d_nnz,o_nnz are:
3068 .vb
3069      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3070      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3071      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3072 .ve
3073    Here the space allocated is sum of all the above values i.e 34, and
3074    hence pre-allocation is perfect.
3075 
3076    Level: intermediate
3077 
3078 .keywords: matrix, aij, compressed row, sparse, parallel
3079 
3080 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3081           MPIAIJ, MatGetInfo()
3082 @*/
3083 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3084 {
3085   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3086 
3087   PetscFunctionBegin;
3088   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3089   if (f) {
3090     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3091   }
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3097 /*@
3098      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3099          CSR format the local rows.
3100 
3101    Collective on MPI_Comm
3102 
3103    Input Parameters:
3104 +  comm - MPI communicator
3105 .  m - number of local rows (Cannot be PETSC_DECIDE)
3106 .  n - This value should be the same as the local size used in creating the
3107        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3108        calculated if N is given) For square matrices n is almost always m.
3109 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3110 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3111 .   i - row indices
3112 .   j - column indices
3113 -   a - matrix values
3114 
3115    Output Parameter:
3116 .   mat - the matrix
3117 
3118    Level: intermediate
3119 
3120    Notes:
3121        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3122      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3123      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3124 
3125        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3126 
3127        The format which is used for the sparse matrix input, is equivalent to a
3128     row-major ordering.. i.e for the following matrix, the input data expected is
3129     as shown:
3130 
3131         1 0 0
3132         2 0 3     P0
3133        -------
3134         4 5 6     P1
3135 
3136      Process0 [P0]: rows_owned=[0,1]
3137         i =  {0,1,3}  [size = nrow+1  = 2+1]
3138         j =  {0,0,2}  [size = nz = 6]
3139         v =  {1,2,3}  [size = nz = 6]
3140 
3141      Process1 [P1]: rows_owned=[2]
3142         i =  {0,3}    [size = nrow+1  = 1+1]
3143         j =  {0,1,2}  [size = nz = 6]
3144         v =  {4,5,6}  [size = nz = 6]
3145 
3146 .keywords: matrix, aij, compressed row, sparse, parallel
3147 
3148 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3149           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3150 @*/
3151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3152 {
3153   PetscErrorCode ierr;
3154 
3155  PetscFunctionBegin;
3156   if (i[0]) {
3157     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3158   }
3159   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3160   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3161   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3162   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3163   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 #undef __FUNCT__
3168 #define __FUNCT__ "MatCreateMPIAIJ"
3169 /*@C
3170    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3171    (the default parallel PETSc format).  For good matrix assembly performance
3172    the user should preallocate the matrix storage by setting the parameters
3173    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3174    performance can be increased by more than a factor of 50.
3175 
3176    Collective on MPI_Comm
3177 
3178    Input Parameters:
3179 +  comm - MPI communicator
3180 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3181            This value should be the same as the local size used in creating the
3182            y vector for the matrix-vector product y = Ax.
3183 .  n - This value should be the same as the local size used in creating the
3184        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3185        calculated if N is given) For square matrices n is almost always m.
3186 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3187 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3188 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3189            (same value is used for all local rows)
3190 .  d_nnz - array containing the number of nonzeros in the various rows of the
3191            DIAGONAL portion of the local submatrix (possibly different for each row)
3192            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3193            The size of this array is equal to the number of local rows, i.e 'm'.
3194            You must leave room for the diagonal entry even if it is zero.
3195 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3196            submatrix (same value is used for all local rows).
3197 -  o_nnz - array containing the number of nonzeros in the various rows of the
3198            OFF-DIAGONAL portion of the local submatrix (possibly different for
3199            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3200            structure. The size of this array is equal to the number
3201            of local rows, i.e 'm'.
3202 
3203    Output Parameter:
3204 .  A - the matrix
3205 
3206    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3207    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3208    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3209    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3210 
3211    Notes:
3212    If the *_nnz parameter is given then the *_nz parameter is ignored
3213 
3214    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3215    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3216    storage requirements for this matrix.
3217 
3218    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3219    processor than it must be used on all processors that share the object for
3220    that argument.
3221 
3222    The user MUST specify either the local or global matrix dimensions
3223    (possibly both).
3224 
3225    The parallel matrix is partitioned across processors such that the
3226    first m0 rows belong to process 0, the next m1 rows belong to
3227    process 1, the next m2 rows belong to process 2 etc.. where
3228    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3229    values corresponding to [m x N] submatrix.
3230 
3231    The columns are logically partitioned with the n0 columns belonging
3232    to 0th partition, the next n1 columns belonging to the next
3233    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3234 
3235    The DIAGONAL portion of the local submatrix on any given processor
3236    is the submatrix corresponding to the rows and columns m,n
3237    corresponding to the given processor. i.e diagonal matrix on
3238    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3239    etc. The remaining portion of the local submatrix [m x (N-n)]
3240    constitute the OFF-DIAGONAL portion. The example below better
3241    illustrates this concept.
3242 
3243    For a square global matrix we define each processor's diagonal portion
3244    to be its local rows and the corresponding columns (a square submatrix);
3245    each processor's off-diagonal portion encompasses the remainder of the
3246    local matrix (a rectangular submatrix).
3247 
3248    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3249 
3250    When calling this routine with a single process communicator, a matrix of
3251    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3252    type of communicator, use the construction mechanism:
3253      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3254 
3255    By default, this format uses inodes (identical nodes) when possible.
3256    We search for consecutive rows with the same nonzero structure, thereby
3257    reusing matrix information to achieve increased efficiency.
3258 
3259    Options Database Keys:
3260 +  -mat_no_inode  - Do not use inodes
3261 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3262 -  -mat_aij_oneindex - Internally use indexing starting at 1
3263         rather than 0.  Note that when calling MatSetValues(),
3264         the user still MUST index entries starting at 0!
3265 
3266 
3267    Example usage:
3268 
3269    Consider the following 8x8 matrix with 34 non-zero values, that is
3270    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3271    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3272    as follows:
3273 
3274 .vb
3275             1  2  0  |  0  3  0  |  0  4
3276     Proc0   0  5  6  |  7  0  0  |  8  0
3277             9  0 10  | 11  0  0  | 12  0
3278     -------------------------------------
3279            13  0 14  | 15 16 17  |  0  0
3280     Proc1   0 18  0  | 19 20 21  |  0  0
3281             0  0  0  | 22 23  0  | 24  0
3282     -------------------------------------
3283     Proc2  25 26 27  |  0  0 28  | 29  0
3284            30  0  0  | 31 32 33  |  0 34
3285 .ve
3286 
3287    This can be represented as a collection of submatrices as:
3288 
3289 .vb
3290       A B C
3291       D E F
3292       G H I
3293 .ve
3294 
3295    Where the submatrices A,B,C are owned by proc0, D,E,F are
3296    owned by proc1, G,H,I are owned by proc2.
3297 
3298    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3299    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3300    The 'M','N' parameters are 8,8, and have the same values on all procs.
3301 
3302    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3303    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3304    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3305    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3306    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3307    matrix, ans [DF] as another SeqAIJ matrix.
3308 
3309    When d_nz, o_nz parameters are specified, d_nz storage elements are
3310    allocated for every row of the local diagonal submatrix, and o_nz
3311    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3312    One way to choose d_nz and o_nz is to use the max nonzerors per local
3313    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3314    In this case, the values of d_nz,o_nz are:
3315 .vb
3316      proc0 : dnz = 2, o_nz = 2
3317      proc1 : dnz = 3, o_nz = 2
3318      proc2 : dnz = 1, o_nz = 4
3319 .ve
3320    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3321    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3322    for proc3. i.e we are using 12+15+10=37 storage locations to store
3323    34 values.
3324 
3325    When d_nnz, o_nnz parameters are specified, the storage is specified
3326    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3327    In the above case the values for d_nnz,o_nnz are:
3328 .vb
3329      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3330      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3331      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3332 .ve
3333    Here the space allocated is sum of all the above values i.e 34, and
3334    hence pre-allocation is perfect.
3335 
3336    Level: intermediate
3337 
3338 .keywords: matrix, aij, compressed row, sparse, parallel
3339 
3340 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3341           MPIAIJ, MatCreateMPIAIJWithArrays()
3342 @*/
3343 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3344 {
3345   PetscErrorCode ierr;
3346   PetscMPIInt    size;
3347 
3348   PetscFunctionBegin;
3349   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3350   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3351   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3352   if (size > 1) {
3353     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3354     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3355   } else {
3356     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3357     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3358   }
3359   PetscFunctionReturn(0);
3360 }
3361 
3362 #undef __FUNCT__
3363 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3364 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3365 {
3366   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3367 
3368   PetscFunctionBegin;
3369   *Ad     = a->A;
3370   *Ao     = a->B;
3371   *colmap = a->garray;
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3377 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3378 {
3379   PetscErrorCode ierr;
3380   PetscInt       i;
3381   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3382 
3383   PetscFunctionBegin;
3384   if (coloring->ctype == IS_COLORING_GLOBAL) {
3385     ISColoringValue *allcolors,*colors;
3386     ISColoring      ocoloring;
3387 
3388     /* set coloring for diagonal portion */
3389     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3390 
3391     /* set coloring for off-diagonal portion */
3392     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3393     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3394     for (i=0; i<a->B->cmap.n; i++) {
3395       colors[i] = allcolors[a->garray[i]];
3396     }
3397     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3398     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3399     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3400     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3401   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3402     ISColoringValue *colors;
3403     PetscInt        *larray;
3404     ISColoring      ocoloring;
3405 
3406     /* set coloring for diagonal portion */
3407     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3408     for (i=0; i<a->A->cmap.n; i++) {
3409       larray[i] = i + A->cmap.rstart;
3410     }
3411     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3412     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3413     for (i=0; i<a->A->cmap.n; i++) {
3414       colors[i] = coloring->colors[larray[i]];
3415     }
3416     ierr = PetscFree(larray);CHKERRQ(ierr);
3417     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3418     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3419     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3420 
3421     /* set coloring for off-diagonal portion */
3422     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3423     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3424     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3425     for (i=0; i<a->B->cmap.n; i++) {
3426       colors[i] = coloring->colors[larray[i]];
3427     }
3428     ierr = PetscFree(larray);CHKERRQ(ierr);
3429     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3430     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3431     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3432   } else {
3433     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3434   }
3435 
3436   PetscFunctionReturn(0);
3437 }
3438 
3439 #if defined(PETSC_HAVE_ADIC)
3440 #undef __FUNCT__
3441 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3442 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3443 {
3444   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3445   PetscErrorCode ierr;
3446 
3447   PetscFunctionBegin;
3448   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3449   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3450   PetscFunctionReturn(0);
3451 }
3452 #endif
3453 
3454 #undef __FUNCT__
3455 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3456 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3457 {
3458   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3459   PetscErrorCode ierr;
3460 
3461   PetscFunctionBegin;
3462   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3463   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 #undef __FUNCT__
3468 #define __FUNCT__ "MatMerge"
3469 /*@
3470       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3471                  matrices from each processor
3472 
3473     Collective on MPI_Comm
3474 
3475    Input Parameters:
3476 +    comm - the communicators the parallel matrix will live on
3477 .    inmat - the input sequential matrices
3478 .    n - number of local columns (or PETSC_DECIDE)
3479 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3480 
3481    Output Parameter:
3482 .    outmat - the parallel matrix generated
3483 
3484     Level: advanced
3485 
3486    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3487 
3488 @*/
3489 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3490 {
3491   PetscErrorCode ierr;
3492   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3493   PetscInt       *indx;
3494   PetscScalar    *values;
3495 
3496   PetscFunctionBegin;
3497   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3498   if (scall == MAT_INITIAL_MATRIX){
3499     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3500     if (n == PETSC_DECIDE){
3501       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3502     }
3503     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3504     rstart -= m;
3505 
3506     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3507     for (i=0;i<m;i++) {
3508       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3509       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3510       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3511     }
3512     /* This routine will ONLY return MPIAIJ type matrix */
3513     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3514     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3515     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3516     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3517     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3518 
3519   } else if (scall == MAT_REUSE_MATRIX){
3520     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3521   } else {
3522     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3523   }
3524 
3525   for (i=0;i<m;i++) {
3526     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3527     Ii    = i + rstart;
3528     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3529     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3530   }
3531   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3532   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3533   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534 
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 #undef __FUNCT__
3539 #define __FUNCT__ "MatFileSplit"
3540 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3541 {
3542   PetscErrorCode    ierr;
3543   PetscMPIInt       rank;
3544   PetscInt          m,N,i,rstart,nnz;
3545   size_t            len;
3546   const PetscInt    *indx;
3547   PetscViewer       out;
3548   char              *name;
3549   Mat               B;
3550   const PetscScalar *values;
3551 
3552   PetscFunctionBegin;
3553   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3554   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3555   /* Should this be the type of the diagonal block of A? */
3556   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3557   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3558   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3559   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3560   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3561   for (i=0;i<m;i++) {
3562     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3563     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3564     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3565   }
3566   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3567   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568 
3569   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3570   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3571   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3572   sprintf(name,"%s.%d",outfile,rank);
3573   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3574   ierr = PetscFree(name);
3575   ierr = MatView(B,out);CHKERRQ(ierr);
3576   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3577   ierr = MatDestroy(B);CHKERRQ(ierr);
3578   PetscFunctionReturn(0);
3579 }
3580 
3581 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3582 #undef __FUNCT__
3583 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3584 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3585 {
3586   PetscErrorCode       ierr;
3587   Mat_Merge_SeqsToMPI  *merge;
3588   PetscContainer       container;
3589 
3590   PetscFunctionBegin;
3591   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3592   if (container) {
3593     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3594     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3595     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3596     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3597     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3598     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3599     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3600     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3601     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3602     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3603     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3604     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3605 
3606     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3607     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3608   }
3609   ierr = PetscFree(merge);CHKERRQ(ierr);
3610 
3611   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3612   PetscFunctionReturn(0);
3613 }
3614 
3615 #include "src/mat/utils/freespace.h"
3616 #include "petscbt.h"
3617 
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3620 /*@C
3621       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3622                  matrices from each processor
3623 
3624     Collective on MPI_Comm
3625 
3626    Input Parameters:
3627 +    comm - the communicators the parallel matrix will live on
3628 .    seqmat - the input sequential matrices
3629 .    m - number of local rows (or PETSC_DECIDE)
3630 .    n - number of local columns (or PETSC_DECIDE)
3631 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3632 
3633    Output Parameter:
3634 .    mpimat - the parallel matrix generated
3635 
3636     Level: advanced
3637 
3638    Notes:
3639      The dimensions of the sequential matrix in each processor MUST be the same.
3640      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3641      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3644 {
3645   PetscErrorCode       ierr;
3646   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3647   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3648   PetscMPIInt          size,rank,taga,*len_s;
3649   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3650   PetscInt             proc,m;
3651   PetscInt             **buf_ri,**buf_rj;
3652   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3653   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3654   MPI_Request          *s_waits,*r_waits;
3655   MPI_Status           *status;
3656   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3657   Mat_Merge_SeqsToMPI  *merge;
3658   PetscContainer       container;
3659 
3660   PetscFunctionBegin;
3661   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3662 
3663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3664   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3665 
3666   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3667   if (container) {
3668     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3669   }
3670   bi     = merge->bi;
3671   bj     = merge->bj;
3672   buf_ri = merge->buf_ri;
3673   buf_rj = merge->buf_rj;
3674 
3675   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3676   owners = merge->rowmap.range;
3677   len_s  = merge->len_s;
3678 
3679   /* send and recv matrix values */
3680   /*-----------------------------*/
3681   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3682   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3683 
3684   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3685   for (proc=0,k=0; proc<size; proc++){
3686     if (!len_s[proc]) continue;
3687     i = owners[proc];
3688     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3689     k++;
3690   }
3691 
3692   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3693   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3694   ierr = PetscFree(status);CHKERRQ(ierr);
3695 
3696   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3697   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3698 
3699   /* insert mat values of mpimat */
3700   /*----------------------------*/
3701   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3702   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3703   nextrow = buf_ri_k + merge->nrecv;
3704   nextai  = nextrow + merge->nrecv;
3705 
3706   for (k=0; k<merge->nrecv; k++){
3707     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3708     nrows = *(buf_ri_k[k]);
3709     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3710     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3711   }
3712 
3713   /* set values of ba */
3714   m = merge->rowmap.n;
3715   for (i=0; i<m; i++) {
3716     arow = owners[rank] + i;
3717     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3718     bnzi = bi[i+1] - bi[i];
3719     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3720 
3721     /* add local non-zero vals of this proc's seqmat into ba */
3722     anzi = ai[arow+1] - ai[arow];
3723     aj   = a->j + ai[arow];
3724     aa   = a->a + ai[arow];
3725     nextaj = 0;
3726     for (j=0; nextaj<anzi; j++){
3727       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3728         ba_i[j] += aa[nextaj++];
3729       }
3730     }
3731 
3732     /* add received vals into ba */
3733     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3734       /* i-th row */
3735       if (i == *nextrow[k]) {
3736         anzi = *(nextai[k]+1) - *nextai[k];
3737         aj   = buf_rj[k] + *(nextai[k]);
3738         aa   = abuf_r[k] + *(nextai[k]);
3739         nextaj = 0;
3740         for (j=0; nextaj<anzi; j++){
3741           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3742             ba_i[j] += aa[nextaj++];
3743           }
3744         }
3745         nextrow[k]++; nextai[k]++;
3746       }
3747     }
3748     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3749   }
3750   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752 
3753   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3754   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3755   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3756   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3762 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3763 {
3764   PetscErrorCode       ierr;
3765   Mat                  B_mpi;
3766   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3767   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3768   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3769   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3770   PetscInt             len,proc,*dnz,*onz;
3771   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3772   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3773   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3774   MPI_Status           *status;
3775   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3776   PetscBT              lnkbt;
3777   Mat_Merge_SeqsToMPI  *merge;
3778   PetscContainer       container;
3779 
3780   PetscFunctionBegin;
3781   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3782 
3783   /* make sure it is a PETSc comm */
3784   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3785   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3786   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3787 
3788   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3789   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3790 
3791   /* determine row ownership */
3792   /*---------------------------------------------------------*/
3793   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3794   merge->rowmap.n = m;
3795   merge->rowmap.N = M;
3796   merge->rowmap.bs = 1;
3797   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3798   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3799   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3800 
3801   m      = merge->rowmap.n;
3802   M      = merge->rowmap.N;
3803   owners = merge->rowmap.range;
3804 
3805   /* determine the number of messages to send, their lengths */
3806   /*---------------------------------------------------------*/
3807   len_s  = merge->len_s;
3808 
3809   len = 0;  /* length of buf_si[] */
3810   merge->nsend = 0;
3811   for (proc=0; proc<size; proc++){
3812     len_si[proc] = 0;
3813     if (proc == rank){
3814       len_s[proc] = 0;
3815     } else {
3816       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3817       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3818     }
3819     if (len_s[proc]) {
3820       merge->nsend++;
3821       nrows = 0;
3822       for (i=owners[proc]; i<owners[proc+1]; i++){
3823         if (ai[i+1] > ai[i]) nrows++;
3824       }
3825       len_si[proc] = 2*(nrows+1);
3826       len += len_si[proc];
3827     }
3828   }
3829 
3830   /* determine the number and length of messages to receive for ij-structure */
3831   /*-------------------------------------------------------------------------*/
3832   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3833   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3834 
3835   /* post the Irecv of j-structure */
3836   /*-------------------------------*/
3837   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3838   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3839 
3840   /* post the Isend of j-structure */
3841   /*--------------------------------*/
3842   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3843   sj_waits = si_waits + merge->nsend;
3844 
3845   for (proc=0, k=0; proc<size; proc++){
3846     if (!len_s[proc]) continue;
3847     i = owners[proc];
3848     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3849     k++;
3850   }
3851 
3852   /* receives and sends of j-structure are complete */
3853   /*------------------------------------------------*/
3854   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3855   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3856 
3857   /* send and recv i-structure */
3858   /*---------------------------*/
3859   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3860   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3861 
3862   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3863   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3864   for (proc=0,k=0; proc<size; proc++){
3865     if (!len_s[proc]) continue;
3866     /* form outgoing message for i-structure:
3867          buf_si[0]:                 nrows to be sent
3868                [1:nrows]:           row index (global)
3869                [nrows+1:2*nrows+1]: i-structure index
3870     */
3871     /*-------------------------------------------*/
3872     nrows = len_si[proc]/2 - 1;
3873     buf_si_i    = buf_si + nrows+1;
3874     buf_si[0]   = nrows;
3875     buf_si_i[0] = 0;
3876     nrows = 0;
3877     for (i=owners[proc]; i<owners[proc+1]; i++){
3878       anzi = ai[i+1] - ai[i];
3879       if (anzi) {
3880         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3881         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3882         nrows++;
3883       }
3884     }
3885     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3886     k++;
3887     buf_si += len_si[proc];
3888   }
3889 
3890   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3891   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3892 
3893   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3894   for (i=0; i<merge->nrecv; i++){
3895     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3896   }
3897 
3898   ierr = PetscFree(len_si);CHKERRQ(ierr);
3899   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3900   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3901   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3902   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3903   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3904   ierr = PetscFree(status);CHKERRQ(ierr);
3905 
3906   /* compute a local seq matrix in each processor */
3907   /*----------------------------------------------*/
3908   /* allocate bi array and free space for accumulating nonzero column info */
3909   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3910   bi[0] = 0;
3911 
3912   /* create and initialize a linked list */
3913   nlnk = N+1;
3914   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3915 
3916   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3917   len = 0;
3918   len  = ai[owners[rank+1]] - ai[owners[rank]];
3919   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3920   current_space = free_space;
3921 
3922   /* determine symbolic info for each local row */
3923   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3924   nextrow = buf_ri_k + merge->nrecv;
3925   nextai  = nextrow + merge->nrecv;
3926   for (k=0; k<merge->nrecv; k++){
3927     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3928     nrows = *buf_ri_k[k];
3929     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3930     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3931   }
3932 
3933   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3934   len = 0;
3935   for (i=0;i<m;i++) {
3936     bnzi   = 0;
3937     /* add local non-zero cols of this proc's seqmat into lnk */
3938     arow   = owners[rank] + i;
3939     anzi   = ai[arow+1] - ai[arow];
3940     aj     = a->j + ai[arow];
3941     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3942     bnzi += nlnk;
3943     /* add received col data into lnk */
3944     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3945       if (i == *nextrow[k]) { /* i-th row */
3946         anzi = *(nextai[k]+1) - *nextai[k];
3947         aj   = buf_rj[k] + *nextai[k];
3948         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3949         bnzi += nlnk;
3950         nextrow[k]++; nextai[k]++;
3951       }
3952     }
3953     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3954 
3955     /* if free space is not available, make more free space */
3956     if (current_space->local_remaining<bnzi) {
3957       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3958       nspacedouble++;
3959     }
3960     /* copy data into free space, then initialize lnk */
3961     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3962     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3963 
3964     current_space->array           += bnzi;
3965     current_space->local_used      += bnzi;
3966     current_space->local_remaining -= bnzi;
3967 
3968     bi[i+1] = bi[i] + bnzi;
3969   }
3970 
3971   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3972 
3973   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3974   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3975   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3976 
3977   /* create symbolic parallel matrix B_mpi */
3978   /*---------------------------------------*/
3979   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3980   if (n==PETSC_DECIDE) {
3981     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3982   } else {
3983     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3984   }
3985   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3986   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3987   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3988 
3989   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3990   B_mpi->assembled     = PETSC_FALSE;
3991   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3992   merge->bi            = bi;
3993   merge->bj            = bj;
3994   merge->buf_ri        = buf_ri;
3995   merge->buf_rj        = buf_rj;
3996   merge->coi           = PETSC_NULL;
3997   merge->coj           = PETSC_NULL;
3998   merge->owners_co     = PETSC_NULL;
3999 
4000   /* attach the supporting struct to B_mpi for reuse */
4001   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4002   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4003   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4004   *mpimat = B_mpi;
4005 
4006   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4007   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4008   PetscFunctionReturn(0);
4009 }
4010 
4011 #undef __FUNCT__
4012 #define __FUNCT__ "MatMerge_SeqsToMPI"
4013 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4014 {
4015   PetscErrorCode   ierr;
4016 
4017   PetscFunctionBegin;
4018   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4019   if (scall == MAT_INITIAL_MATRIX){
4020     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4021   }
4022   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4023   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4024   PetscFunctionReturn(0);
4025 }
4026 
4027 #undef __FUNCT__
4028 #define __FUNCT__ "MatGetLocalMat"
4029 /*@
4030      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4031 
4032     Not Collective
4033 
4034    Input Parameters:
4035 +    A - the matrix
4036 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4037 
4038    Output Parameter:
4039 .    A_loc - the local sequential matrix generated
4040 
4041     Level: developer
4042 
4043 @*/
4044 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4045 {
4046   PetscErrorCode  ierr;
4047   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4048   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4049   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4050   PetscScalar     *aa=a->a,*ba=b->a,*ca;
4051   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4052   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4053 
4054   PetscFunctionBegin;
4055   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4056   if (scall == MAT_INITIAL_MATRIX){
4057     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4058     ci[0] = 0;
4059     for (i=0; i<am; i++){
4060       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4061     }
4062     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4063     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4064     k = 0;
4065     for (i=0; i<am; i++) {
4066       ncols_o = bi[i+1] - bi[i];
4067       ncols_d = ai[i+1] - ai[i];
4068       /* off-diagonal portion of A */
4069       for (jo=0; jo<ncols_o; jo++) {
4070         col = cmap[*bj];
4071         if (col >= cstart) break;
4072         cj[k]   = col; bj++;
4073         ca[k++] = *ba++;
4074       }
4075       /* diagonal portion of A */
4076       for (j=0; j<ncols_d; j++) {
4077         cj[k]   = cstart + *aj++;
4078         ca[k++] = *aa++;
4079       }
4080       /* off-diagonal portion of A */
4081       for (j=jo; j<ncols_o; j++) {
4082         cj[k]   = cmap[*bj++];
4083         ca[k++] = *ba++;
4084       }
4085     }
4086     /* put together the new matrix */
4087     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4088     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4089     /* Since these are PETSc arrays, change flags to free them as necessary. */
4090     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4091     mat->free_a  = PETSC_TRUE;
4092     mat->free_ij = PETSC_TRUE;
4093     mat->nonew   = 0;
4094   } else if (scall == MAT_REUSE_MATRIX){
4095     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4096     ci = mat->i; cj = mat->j; ca = mat->a;
4097     for (i=0; i<am; i++) {
4098       /* off-diagonal portion of A */
4099       ncols_o = bi[i+1] - bi[i];
4100       for (jo=0; jo<ncols_o; jo++) {
4101         col = cmap[*bj];
4102         if (col >= cstart) break;
4103         *ca++ = *ba++; bj++;
4104       }
4105       /* diagonal portion of A */
4106       ncols_d = ai[i+1] - ai[i];
4107       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4108       /* off-diagonal portion of A */
4109       for (j=jo; j<ncols_o; j++) {
4110         *ca++ = *ba++; bj++;
4111       }
4112     }
4113   } else {
4114     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4115   }
4116 
4117   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4118   PetscFunctionReturn(0);
4119 }
4120 
4121 #undef __FUNCT__
4122 #define __FUNCT__ "MatGetLocalMatCondensed"
4123 /*@C
4124      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4125 
4126     Not Collective
4127 
4128    Input Parameters:
4129 +    A - the matrix
4130 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4131 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4132 
4133    Output Parameter:
4134 .    A_loc - the local sequential matrix generated
4135 
4136     Level: developer
4137 
4138 @*/
4139 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4140 {
4141   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4142   PetscErrorCode    ierr;
4143   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4144   IS                isrowa,iscola;
4145   Mat               *aloc;
4146 
4147   PetscFunctionBegin;
4148   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4149   if (!row){
4150     start = A->rmap.rstart; end = A->rmap.rend;
4151     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4152   } else {
4153     isrowa = *row;
4154   }
4155   if (!col){
4156     start = A->cmap.rstart;
4157     cmap  = a->garray;
4158     nzA   = a->A->cmap.n;
4159     nzB   = a->B->cmap.n;
4160     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4161     ncols = 0;
4162     for (i=0; i<nzB; i++) {
4163       if (cmap[i] < start) idx[ncols++] = cmap[i];
4164       else break;
4165     }
4166     imark = i;
4167     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4168     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4169     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4170     ierr = PetscFree(idx);CHKERRQ(ierr);
4171   } else {
4172     iscola = *col;
4173   }
4174   if (scall != MAT_INITIAL_MATRIX){
4175     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4176     aloc[0] = *A_loc;
4177   }
4178   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4179   *A_loc = aloc[0];
4180   ierr = PetscFree(aloc);CHKERRQ(ierr);
4181   if (!row){
4182     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4183   }
4184   if (!col){
4185     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4186   }
4187   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4188   PetscFunctionReturn(0);
4189 }
4190 
4191 #undef __FUNCT__
4192 #define __FUNCT__ "MatGetBrowsOfAcols"
4193 /*@C
4194     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4195 
4196     Collective on Mat
4197 
4198    Input Parameters:
4199 +    A,B - the matrices in mpiaij format
4200 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4201 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4202 
4203    Output Parameter:
4204 +    rowb, colb - index sets of rows and columns of B to extract
4205 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4206 -    B_seq - the sequential matrix generated
4207 
4208     Level: developer
4209 
4210 @*/
4211 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4212 {
4213   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4214   PetscErrorCode    ierr;
4215   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4216   IS                isrowb,iscolb;
4217   Mat               *bseq;
4218 
4219   PetscFunctionBegin;
4220   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4221     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4222   }
4223   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4224 
4225   if (scall == MAT_INITIAL_MATRIX){
4226     start = A->cmap.rstart;
4227     cmap  = a->garray;
4228     nzA   = a->A->cmap.n;
4229     nzB   = a->B->cmap.n;
4230     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4231     ncols = 0;
4232     for (i=0; i<nzB; i++) {  /* row < local row index */
4233       if (cmap[i] < start) idx[ncols++] = cmap[i];
4234       else break;
4235     }
4236     imark = i;
4237     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4238     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4239     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4240     ierr = PetscFree(idx);CHKERRQ(ierr);
4241     *brstart = imark;
4242     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4243   } else {
4244     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4245     isrowb = *rowb; iscolb = *colb;
4246     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4247     bseq[0] = *B_seq;
4248   }
4249   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4250   *B_seq = bseq[0];
4251   ierr = PetscFree(bseq);CHKERRQ(ierr);
4252   if (!rowb){
4253     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4254   } else {
4255     *rowb = isrowb;
4256   }
4257   if (!colb){
4258     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4259   } else {
4260     *colb = iscolb;
4261   }
4262   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4263   PetscFunctionReturn(0);
4264 }
4265 
4266 #undef __FUNCT__
4267 #define __FUNCT__ "MatGetBrowsOfAoCols"
4268 /*@C
4269     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4270     of the OFF-DIAGONAL portion of local A
4271 
4272     Collective on Mat
4273 
4274    Input Parameters:
4275 +    A,B - the matrices in mpiaij format
4276 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4277 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4278 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4279 
4280    Output Parameter:
4281 +    B_oth - the sequential matrix generated
4282 
4283     Level: developer
4284 
4285 @*/
4286 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4287 {
4288   VecScatter_MPI_General *gen_to,*gen_from;
4289   PetscErrorCode         ierr;
4290   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4291   Mat_SeqAIJ             *b_oth;
4292   VecScatter             ctx=a->Mvctx;
4293   MPI_Comm               comm=((PetscObject)ctx)->comm;
4294   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4295   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4296   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4297   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4298   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4299   MPI_Status             *sstatus,rstatus;
4300   PetscMPIInt            jj;
4301   PetscInt               *cols,sbs,rbs;
4302   PetscScalar            *vals;
4303 
4304   PetscFunctionBegin;
4305   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4306     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4307   }
4308   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4309   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4310 
4311   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4312   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4313   rvalues  = gen_from->values; /* holds the length of receiving row */
4314   svalues  = gen_to->values;   /* holds the length of sending row */
4315   nrecvs   = gen_from->n;
4316   nsends   = gen_to->n;
4317 
4318   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4319   srow     = gen_to->indices;   /* local row index to be sent */
4320   sstarts  = gen_to->starts;
4321   sprocs   = gen_to->procs;
4322   sstatus  = gen_to->sstatus;
4323   sbs      = gen_to->bs;
4324   rstarts  = gen_from->starts;
4325   rprocs   = gen_from->procs;
4326   rbs      = gen_from->bs;
4327 
4328   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4329   if (scall == MAT_INITIAL_MATRIX){
4330     /* i-array */
4331     /*---------*/
4332     /*  post receives */
4333     for (i=0; i<nrecvs; i++){
4334       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4335       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4336       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4337     }
4338 
4339     /* pack the outgoing message */
4340     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4341     rstartsj = sstartsj + nsends +1;
4342     sstartsj[0] = 0;  rstartsj[0] = 0;
4343     len = 0; /* total length of j or a array to be sent */
4344     k = 0;
4345     for (i=0; i<nsends; i++){
4346       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4347       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4348       for (j=0; j<nrows; j++) {
4349         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4350         for (l=0; l<sbs; l++){
4351           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4352           rowlen[j*sbs+l] = ncols;
4353           len += ncols;
4354           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4355         }
4356         k++;
4357       }
4358       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4359       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4360     }
4361     /* recvs and sends of i-array are completed */
4362     i = nrecvs;
4363     while (i--) {
4364       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4365     }
4366     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4367 
4368     /* allocate buffers for sending j and a arrays */
4369     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4370     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4371 
4372     /* create i-array of B_oth */
4373     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4374     b_othi[0] = 0;
4375     len = 0; /* total length of j or a array to be received */
4376     k = 0;
4377     for (i=0; i<nrecvs; i++){
4378       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4379       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4380       for (j=0; j<nrows; j++) {
4381         b_othi[k+1] = b_othi[k] + rowlen[j];
4382         len += rowlen[j]; k++;
4383       }
4384       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4385     }
4386 
4387     /* allocate space for j and a arrrays of B_oth */
4388     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4389     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4390 
4391     /* j-array */
4392     /*---------*/
4393     /*  post receives of j-array */
4394     for (i=0; i<nrecvs; i++){
4395       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4396       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4397     }
4398 
4399     /* pack the outgoing message j-array */
4400     k = 0;
4401     for (i=0; i<nsends; i++){
4402       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4403       bufJ = bufj+sstartsj[i];
4404       for (j=0; j<nrows; j++) {
4405         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4406         for (ll=0; ll<sbs; ll++){
4407           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4408           for (l=0; l<ncols; l++){
4409             *bufJ++ = cols[l];
4410           }
4411           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4412         }
4413       }
4414       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4415     }
4416 
4417     /* recvs and sends of j-array are completed */
4418     i = nrecvs;
4419     while (i--) {
4420       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4421     }
4422     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4423   } else if (scall == MAT_REUSE_MATRIX){
4424     sstartsj = *startsj;
4425     rstartsj = sstartsj + nsends +1;
4426     bufa     = *bufa_ptr;
4427     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4428     b_otha   = b_oth->a;
4429   } else {
4430     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4431   }
4432 
4433   /* a-array */
4434   /*---------*/
4435   /*  post receives of a-array */
4436   for (i=0; i<nrecvs; i++){
4437     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4438     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4439   }
4440 
4441   /* pack the outgoing message a-array */
4442   k = 0;
4443   for (i=0; i<nsends; i++){
4444     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4445     bufA = bufa+sstartsj[i];
4446     for (j=0; j<nrows; j++) {
4447       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4448       for (ll=0; ll<sbs; ll++){
4449         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4450         for (l=0; l<ncols; l++){
4451           *bufA++ = vals[l];
4452         }
4453         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4454       }
4455     }
4456     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4457   }
4458   /* recvs and sends of a-array are completed */
4459   i = nrecvs;
4460   while (i--) {
4461     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4462   }
4463   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4464   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4465 
4466   if (scall == MAT_INITIAL_MATRIX){
4467     /* put together the new matrix */
4468     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4469 
4470     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4471     /* Since these are PETSc arrays, change flags to free them as necessary. */
4472     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4473     b_oth->free_a  = PETSC_TRUE;
4474     b_oth->free_ij = PETSC_TRUE;
4475     b_oth->nonew   = 0;
4476 
4477     ierr = PetscFree(bufj);CHKERRQ(ierr);
4478     if (!startsj || !bufa_ptr){
4479       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4480       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4481     } else {
4482       *startsj  = sstartsj;
4483       *bufa_ptr = bufa;
4484     }
4485   }
4486   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4487   PetscFunctionReturn(0);
4488 }
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatGetCommunicationStructs"
4492 /*@C
4493   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4494 
4495   Not Collective
4496 
4497   Input Parameters:
4498 . A - The matrix in mpiaij format
4499 
4500   Output Parameter:
4501 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4502 . colmap - A map from global column index to local index into lvec
4503 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4504 
4505   Level: developer
4506 
4507 @*/
4508 #if defined (PETSC_USE_CTABLE)
4509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4510 #else
4511 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4512 #endif
4513 {
4514   Mat_MPIAIJ *a;
4515 
4516   PetscFunctionBegin;
4517   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4518   PetscValidPointer(lvec, 2)
4519   PetscValidPointer(colmap, 3)
4520   PetscValidPointer(multScatter, 4)
4521   a = (Mat_MPIAIJ *) A->data;
4522   if (lvec) *lvec = a->lvec;
4523   if (colmap) *colmap = a->colmap;
4524   if (multScatter) *multScatter = a->Mvctx;
4525   PetscFunctionReturn(0);
4526 }
4527 
4528 EXTERN_C_BEGIN
4529 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4530 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4531 EXTERN_C_END
4532 
4533 /*MC
4534    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4535 
4536    Options Database Keys:
4537 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4538 
4539   Level: beginner
4540 
4541 .seealso: MatCreateMPIAIJ()
4542 M*/
4543 
4544 EXTERN_C_BEGIN
4545 #undef __FUNCT__
4546 #define __FUNCT__ "MatCreate_MPIAIJ"
4547 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4548 {
4549   Mat_MPIAIJ     *b;
4550   PetscErrorCode ierr;
4551   PetscMPIInt    size;
4552 
4553   PetscFunctionBegin;
4554   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4555 
4556   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4557   B->data         = (void*)b;
4558   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4559   B->factor       = 0;
4560   B->rmap.bs      = 1;
4561   B->assembled    = PETSC_FALSE;
4562   B->mapping      = 0;
4563 
4564   B->insertmode      = NOT_SET_VALUES;
4565   b->size            = size;
4566   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4567 
4568   /* build cache for off array entries formed */
4569   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4570   b->donotstash  = PETSC_FALSE;
4571   b->colmap      = 0;
4572   b->garray      = 0;
4573   b->roworiented = PETSC_TRUE;
4574 
4575   /* stuff used for matrix vector multiply */
4576   b->lvec      = PETSC_NULL;
4577   b->Mvctx     = PETSC_NULL;
4578 
4579   /* stuff for MatGetRow() */
4580   b->rowindices   = 0;
4581   b->rowvalues    = 0;
4582   b->getrowactive = PETSC_FALSE;
4583 
4584 
4585   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4586                                      "MatStoreValues_MPIAIJ",
4587                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4589                                      "MatRetrieveValues_MPIAIJ",
4590                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4592 				     "MatGetDiagonalBlock_MPIAIJ",
4593                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4595 				     "MatIsTranspose_MPIAIJ",
4596 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4598 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4599 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4601 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4602 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4604 				     "MatDiagonalScaleLocal_MPIAIJ",
4605 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4607                                      "MatConvert_MPIAIJ_MPICSRPERM",
4608                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4610                                      "MatConvert_MPIAIJ_MPICRL",
4611                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4612   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4613   PetscFunctionReturn(0);
4614 }
4615 EXTERN_C_END
4616 
4617 #undef __FUNCT__
4618 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4619 /*@
4620      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4621          and "off-diagonal" part of the matrix in CSR format.
4622 
4623    Collective on MPI_Comm
4624 
4625    Input Parameters:
4626 +  comm - MPI communicator
4627 .  m - number of local rows (Cannot be PETSC_DECIDE)
4628 .  n - This value should be the same as the local size used in creating the
4629        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4630        calculated if N is given) For square matrices n is almost always m.
4631 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4632 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4633 .   i - row indices for "diagonal" portion of matrix
4634 .   j - column indices
4635 .   a - matrix values
4636 .   oi - row indices for "off-diagonal" portion of matrix
4637 .   oj - column indices
4638 -   oa - matrix values
4639 
4640    Output Parameter:
4641 .   mat - the matrix
4642 
4643    Level: advanced
4644 
4645    Notes:
4646        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4647 
4648        The i and j indices are 0 based
4649 
4650        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4651 
4652 
4653 .keywords: matrix, aij, compressed row, sparse, parallel
4654 
4655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4656           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4657 @*/
4658 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4659 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4660 {
4661   PetscErrorCode ierr;
4662   Mat_MPIAIJ     *maij;
4663 
4664  PetscFunctionBegin;
4665   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4666   if (i[0]) {
4667     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4668   }
4669   if (oi[0]) {
4670     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4671   }
4672   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4673   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4674   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4675   maij = (Mat_MPIAIJ*) (*mat)->data;
4676   maij->donotstash     = PETSC_TRUE;
4677   (*mat)->preallocated = PETSC_TRUE;
4678 
4679   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4680   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4681   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4682 
4683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4684   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4685 
4686   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4687   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4688   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4689   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4690 
4691   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4692   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4693   PetscFunctionReturn(0);
4694 }
4695 
4696 /*
4697     Special version for direct calls from Fortran
4698 */
4699 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4700 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4701 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4702 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4703 #endif
4704 
4705 /* Change these macros so can be used in void function */
4706 #undef CHKERRQ
4707 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4708 #undef SETERRQ2
4709 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4710 #undef SETERRQ
4711 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4712 
4713 EXTERN_C_BEGIN
4714 #undef __FUNCT__
4715 #define __FUNCT__ "matsetvaluesmpiaij_"
4716 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4717 {
4718   Mat            mat = *mmat;
4719   PetscInt       m = *mm, n = *mn;
4720   InsertMode     addv = *maddv;
4721   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4722   PetscScalar    value;
4723   PetscErrorCode ierr;
4724 
4725   MatPreallocated(mat);
4726   if (mat->insertmode == NOT_SET_VALUES) {
4727     mat->insertmode = addv;
4728   }
4729 #if defined(PETSC_USE_DEBUG)
4730   else if (mat->insertmode != addv) {
4731     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4732   }
4733 #endif
4734   {
4735   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4736   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4737   PetscTruth     roworiented = aij->roworiented;
4738 
4739   /* Some Variables required in the macro */
4740   Mat            A = aij->A;
4741   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4742   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4743   PetscScalar    *aa = a->a;
4744   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4745   Mat            B = aij->B;
4746   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4747   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4748   PetscScalar    *ba = b->a;
4749 
4750   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4751   PetscInt       nonew = a->nonew;
4752   PetscScalar    *ap1,*ap2;
4753 
4754   PetscFunctionBegin;
4755   for (i=0; i<m; i++) {
4756     if (im[i] < 0) continue;
4757 #if defined(PETSC_USE_DEBUG)
4758     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4759 #endif
4760     if (im[i] >= rstart && im[i] < rend) {
4761       row      = im[i] - rstart;
4762       lastcol1 = -1;
4763       rp1      = aj + ai[row];
4764       ap1      = aa + ai[row];
4765       rmax1    = aimax[row];
4766       nrow1    = ailen[row];
4767       low1     = 0;
4768       high1    = nrow1;
4769       lastcol2 = -1;
4770       rp2      = bj + bi[row];
4771       ap2      = ba + bi[row];
4772       rmax2    = bimax[row];
4773       nrow2    = bilen[row];
4774       low2     = 0;
4775       high2    = nrow2;
4776 
4777       for (j=0; j<n; j++) {
4778         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4779         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4780         if (in[j] >= cstart && in[j] < cend){
4781           col = in[j] - cstart;
4782           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4783         } else if (in[j] < 0) continue;
4784 #if defined(PETSC_USE_DEBUG)
4785         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4786 #endif
4787         else {
4788           if (mat->was_assembled) {
4789             if (!aij->colmap) {
4790               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4791             }
4792 #if defined (PETSC_USE_CTABLE)
4793             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4794 	    col--;
4795 #else
4796             col = aij->colmap[in[j]] - 1;
4797 #endif
4798             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4799               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4800               col =  in[j];
4801               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4802               B = aij->B;
4803               b = (Mat_SeqAIJ*)B->data;
4804               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4805               rp2      = bj + bi[row];
4806               ap2      = ba + bi[row];
4807               rmax2    = bimax[row];
4808               nrow2    = bilen[row];
4809               low2     = 0;
4810               high2    = nrow2;
4811               bm       = aij->B->rmap.n;
4812               ba = b->a;
4813             }
4814           } else col = in[j];
4815           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4816         }
4817       }
4818     } else {
4819       if (!aij->donotstash) {
4820         if (roworiented) {
4821           if (ignorezeroentries && v[i*n] == 0.0) continue;
4822           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4823         } else {
4824           if (ignorezeroentries && v[i] == 0.0) continue;
4825           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4826         }
4827       }
4828     }
4829   }}
4830   PetscFunctionReturnVoid();
4831 }
4832 EXTERN_C_END
4833 
4834