xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a77337e4d7d5143cd577a9cca2c72dcc9694336d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
56       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}		\
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81     for (_i=low2; _i<high2; _i++) {		\
82       if (rp2[_i] > col) break;			\
83       if (rp2[_i] == col) {			      \
84 	if (addv == ADD_VALUES) ap2[_i] += value;     \
85 	else                    ap2[_i] = value;      \
86 	goto b_noinsert;			      \
87       }						      \
88     }							      \
89     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
90     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}		\
91     if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93     N = nrow2++ - 1; b->nz++; high2++;					\
94     /* shift up all the later entries in this row */			\
95     for (ii=N; ii>=_i; ii--) {						\
96       rp2[ii+1] = rp2[ii];						\
97       ap2[ii+1] = ap2[ii];						\
98     }									\
99     rp2[_i] = col;							\
100     ap2[_i] = value;							\
101     b_noinsert: ;								\
102     bilen[row] = nrow2;							\
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   MatScalar      *aa = a->a;
149   PetscTruth     ignorezeroentries = a->ignorezeroentries;
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   MatScalar      *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   MatScalar      *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col;
319   PetscTruth     other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = ((PetscObject)A)->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDestroy(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
728   ierr = PetscFree(aij);CHKERRQ(ierr);
729 
730   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
750   int               fd;
751   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
752   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (!rank) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->rmap.N;
762     header[2] = mat->cmap.N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->rmap.n;
768     range = mat->rmap.range;
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
774     rlen = mat->rmap.n;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->rmap.n; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (!rank) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->rmap.n; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (!rank) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->rmap.n; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (!rank) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,iascii,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo    info;
886       PetscTruth inodes;
887 
888       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
889       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
890       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
891       if (!inodes) {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
893 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
896 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       }
898       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
903       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
904       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
905       PetscFunctionReturn(0);
906     } else if (format == PETSC_VIEWER_ASCII_INFO) {
907       PetscInt   inodecount,inodelimit,*inodes;
908       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
909       if (inodes) {
910         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
911       } else {
912         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
913       }
914       PetscFunctionReturn(0);
915     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
916       PetscFunctionReturn(0);
917     }
918   } else if (isbinary) {
919     if (size == 1) {
920       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
921       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922     } else {
923       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
924     }
925     PetscFunctionReturn(0);
926   } else if (isdraw) {
927     PetscDraw  draw;
928     PetscTruth isnull;
929     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
930     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
931   }
932 
933   if (size == 1) {
934     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
935     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
936   } else {
937     /* assemble the entire matrix onto first processor. */
938     Mat         A;
939     Mat_SeqAIJ  *Aloc;
940     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
941     PetscScalar *a;
942 
943     if (mat->rmap.N > 1024) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 512 rows, use binary format instead");
944 
945     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
946     if (!rank) {
947       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
948     } else {
949       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
950     }
951     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
952     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
953     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
954     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
955 
956     /* copy over the A part */
957     Aloc = (Mat_SeqAIJ*)aij->A->data;
958     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
959     row = mat->rmap.rstart;
960     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
964     }
965     aj = Aloc->j;
966     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
967 
968     /* copy over the B part */
969     Aloc = (Mat_SeqAIJ*)aij->B->data;
970     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row  = mat->rmap.rstart;
972     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
973     ct   = cols;
974     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
978     }
979     ierr = PetscFree(ct);CHKERRQ(ierr);
980     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982     /*
983        Everyone has to call to draw the matrix since the graphics waits are
984        synchronized across all processors that share the PetscDraw object
985     */
986     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     if (!rank) {
988       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
990     }
991     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
992     ierr = MatDestroy(A);CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatView_MPIAIJ"
999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1000 {
1001   PetscErrorCode ierr;
1002   PetscTruth     iascii,isdraw,issocket,isbinary;
1003 
1004   PetscFunctionBegin;
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1006   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1008   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1009   if (iascii || isdraw || isbinary || issocket) {
1010     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1011   } else {
1012     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatRelax_MPIAIJ"
1019 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1020 {
1021   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1022   PetscErrorCode ierr;
1023   Vec            bb1;
1024 
1025   PetscFunctionBegin;
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1076     }
1077   } else {
1078     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1079   }
1080 
1081   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatPermute_MPIAIJ"
1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1088 {
1089   MPI_Comm       comm,pcomm;
1090   PetscInt       first,local_size,nrows,*rows;
1091   int            ntids;
1092   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1097   /* make a collective version of 'rowp' */
1098   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1099   if (pcomm==comm) {
1100     crowp = rowp;
1101   } else {
1102     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1103     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1104     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1105     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1106   }
1107   /* collect the global row permutation and invert it */
1108   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1109   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1110   if (pcomm!=comm) {
1111     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1112   }
1113   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1114   /* get the local target indices */
1115   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1119   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1121   /* the column permutation is so much easier;
1122      make a local version of 'colp' and invert it */
1123   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1125   if (ntids==1) {
1126     lcolp = colp;
1127   } else {
1128     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1129     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1131   }
1132   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1133   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1134   if (ntids>1) {
1135     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1137   }
1138   /* now we just get the submatrix */
1139   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1140   /* clean up */
1141   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1142   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1151   Mat            A = mat->A,B = mat->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = 1.0;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   info->rows_global       = (double)matin->rmap.N;
1188   info->columns_global    = (double)matin->cmap.N;
1189   info->rows_local        = (double)matin->rmap.n;
1190   info->columns_local     = (double)matin->cmap.N;
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPIAIJ"
1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscTruth flg)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NEW_NONZERO_LOCATIONS:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_KEEP_ZEROED_ROWS:
1207   case MAT_NEW_NONZERO_LOCATION_ERR:
1208   case MAT_USE_INODES:
1209   case MAT_IGNORE_ZERO_ENTRIES:
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_ROW_ORIENTED:
1214     a->roworiented = flg;
1215     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1216     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1217     break;
1218   case MAT_NEW_DIAGONALS:
1219     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1220     break;
1221   case MAT_IGNORE_OFF_PROC_ENTRIES:
1222     a->donotstash = PETSC_TRUE;
1223     break;
1224   case MAT_SYMMETRIC:
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226     break;
1227   case MAT_STRUCTURALLY_SYMMETRIC:
1228   case MAT_HERMITIAN:
1229   case MAT_SYMMETRY_ETERNAL:
1230     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1231     break;
1232   default:
1233     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatGetRow_MPIAIJ"
1240 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1241 {
1242   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1243   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1244   PetscErrorCode ierr;
1245   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1246   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1247   PetscInt       *cmap,*idx_p;
1248 
1249   PetscFunctionBegin;
1250   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1251   mat->getrowactive = PETSC_TRUE;
1252 
1253   if (!mat->rowvalues && (idx || v)) {
1254     /*
1255         allocate enough space to hold information from the longest row.
1256     */
1257     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1258     PetscInt     max = 1,tmp;
1259     for (i=0; i<matin->rmap.n; i++) {
1260       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1261       if (max < tmp) { max = tmp; }
1262     }
1263     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1264     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1265   }
1266 
1267   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1268   lrow = row - rstart;
1269 
1270   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1271   if (!v)   {pvA = 0; pvB = 0;}
1272   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1273   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1274   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1275   nztot = nzA + nzB;
1276 
1277   cmap  = mat->garray;
1278   if (v  || idx) {
1279     if (nztot) {
1280       /* Sort by increasing column numbers, assuming A and B already sorted */
1281       PetscInt imark = -1;
1282       if (v) {
1283         *v = v_p = mat->rowvalues;
1284         for (i=0; i<nzB; i++) {
1285           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1286           else break;
1287         }
1288         imark = i;
1289         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1290         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1291       }
1292       if (idx) {
1293         *idx = idx_p = mat->rowindices;
1294         if (imark > -1) {
1295           for (i=0; i<imark; i++) {
1296             idx_p[i] = cmap[cworkB[i]];
1297           }
1298         } else {
1299           for (i=0; i<nzB; i++) {
1300             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1301             else break;
1302           }
1303           imark = i;
1304         }
1305         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1306         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1307       }
1308     } else {
1309       if (idx) *idx = 0;
1310       if (v)   *v   = 0;
1311     }
1312   }
1313   *nz = nztot;
1314   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1315   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1321 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1324 
1325   PetscFunctionBegin;
1326   if (!aij->getrowactive) {
1327     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1328   }
1329   aij->getrowactive = PETSC_FALSE;
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatNorm_MPIAIJ"
1335 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1336 {
1337   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1338   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1339   PetscErrorCode ierr;
1340   PetscInt       i,j,cstart = mat->cmap.rstart;
1341   PetscReal      sum = 0.0;
1342   MatScalar      *v;
1343 
1344   PetscFunctionBegin;
1345   if (aij->size == 1) {
1346     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1347   } else {
1348     if (type == NORM_FROBENIUS) {
1349       v = amat->a;
1350       for (i=0; i<amat->nz; i++) {
1351 #if defined(PETSC_USE_COMPLEX)
1352         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1353 #else
1354         sum += (*v)*(*v); v++;
1355 #endif
1356       }
1357       v = bmat->a;
1358       for (i=0; i<bmat->nz; i++) {
1359 #if defined(PETSC_USE_COMPLEX)
1360         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1361 #else
1362         sum += (*v)*(*v); v++;
1363 #endif
1364       }
1365       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1366       *norm = sqrt(*norm);
1367     } else if (type == NORM_1) { /* max column norm */
1368       PetscReal *tmp,*tmp2;
1369       PetscInt  *jj,*garray = aij->garray;
1370       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1371       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1372       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1373       *norm = 0.0;
1374       v = amat->a; jj = amat->j;
1375       for (j=0; j<amat->nz; j++) {
1376         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1377       }
1378       v = bmat->a; jj = bmat->j;
1379       for (j=0; j<bmat->nz; j++) {
1380         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1381       }
1382       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
1383       for (j=0; j<mat->cmap.N; j++) {
1384         if (tmp2[j] > *norm) *norm = tmp2[j];
1385       }
1386       ierr = PetscFree(tmp);CHKERRQ(ierr);
1387       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1388     } else if (type == NORM_INFINITY) { /* max row norm */
1389       PetscReal ntemp = 0.0;
1390       for (j=0; j<aij->A->rmap.n; j++) {
1391         v = amat->a + amat->i[j];
1392         sum = 0.0;
1393         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1394           sum += PetscAbsScalar(*v); v++;
1395         }
1396         v = bmat->a + bmat->i[j];
1397         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1398           sum += PetscAbsScalar(*v); v++;
1399         }
1400         if (sum > ntemp) ntemp = sum;
1401       }
1402       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
1403     } else {
1404       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1405     }
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatTranspose_MPIAIJ"
1412 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1413 {
1414   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1415   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1416   PetscErrorCode ierr;
1417   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1418   PetscInt       cstart=A->cmap.rstart,ncol;
1419   Mat            B;
1420   MatScalar      *array;
1421 
1422   PetscFunctionBegin;
1423   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1424 
1425   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1426   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1427   ai = Aloc->i; aj = Aloc->j;
1428   bi = Bloc->i; bj = Bloc->j;
1429   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1430   cols = d_nnz + na + 1; /* work space to be used by B part */
1431   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1432   for (i=0; i<ai[ma]; i++){
1433     d_nnz[aj[i]] ++;
1434     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1435   }
1436 
1437   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1438   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1439   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1440   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1441 
1442   /* copy over the A part */
1443   array = Aloc->a;
1444   row = A->rmap.rstart;
1445   for (i=0; i<ma; i++) {
1446     ncol = ai[i+1]-ai[i];
1447     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1448     row++; array += ncol; aj += ncol;
1449   }
1450   aj = Aloc->j;
1451   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1452 
1453   /* copy over the B part */
1454   array = Bloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1457   for (i=0; i<mb; i++) {
1458     ncol = bi[i+1]-bi[i];
1459     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1460     row++; array += ncol; cols += ncol;
1461   }
1462   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1463   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465   if (matout) {
1466     *matout = B;
1467   } else {
1468     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1476 {
1477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1478   Mat            a = aij->A,b = aij->B;
1479   PetscErrorCode ierr;
1480   PetscInt       s1,s2,s3;
1481 
1482   PetscFunctionBegin;
1483   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1484   if (rr) {
1485     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1486     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1487     /* Overlap communication with computation. */
1488     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489   }
1490   if (ll) {
1491     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1492     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1493     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1494   }
1495   /* scale  the diagonal block */
1496   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1497 
1498   if (rr) {
1499     /* Do a scatter end and then right scale the off-diagonal block */
1500     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1501     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1502   }
1503 
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1509 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1510 {
1511   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1516   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1521 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1522 {
1523   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatEqual_MPIAIJ"
1533 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1534 {
1535   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1536   Mat            a,b,c,d;
1537   PetscTruth     flg;
1538   PetscErrorCode ierr;
1539 
1540   PetscFunctionBegin;
1541   a = matA->A; b = matA->B;
1542   c = matB->A; d = matB->B;
1543 
1544   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1545   if (flg) {
1546     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1547   }
1548   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1549   PetscFunctionReturn(0);
1550 }
1551 
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatCopy_MPIAIJ"
1554 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1555 {
1556   PetscErrorCode ierr;
1557   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1558   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1559 
1560   PetscFunctionBegin;
1561   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1562   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1563     /* because of the column compression in the off-processor part of the matrix a->B,
1564        the number of columns in a->B and b->B may be different, hence we cannot call
1565        the MatCopy() directly on the two parts. If need be, we can provide a more
1566        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1567        then copying the submatrices */
1568     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1569   } else {
1570     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1571     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1572   }
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1578 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1579 {
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #include "petscblaslapack.h"
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatAXPY_MPIAIJ"
1590 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1591 {
1592   PetscErrorCode ierr;
1593   PetscInt       i;
1594   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1595   PetscBLASInt   bnz,one=1;
1596   Mat_SeqAIJ     *x,*y;
1597 
1598   PetscFunctionBegin;
1599   if (str == SAME_NONZERO_PATTERN) {
1600     PetscScalar alpha = a;
1601     x = (Mat_SeqAIJ *)xx->A->data;
1602     y = (Mat_SeqAIJ *)yy->A->data;
1603     bnz = PetscBLASIntCast(x->nz);
1604     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1605     x = (Mat_SeqAIJ *)xx->B->data;
1606     y = (Mat_SeqAIJ *)yy->B->data;
1607     bnz = PetscBLASIntCast(x->nz);
1608     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1609   } else if (str == SUBSET_NONZERO_PATTERN) {
1610     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1611 
1612     x = (Mat_SeqAIJ *)xx->B->data;
1613     y = (Mat_SeqAIJ *)yy->B->data;
1614     if (y->xtoy && y->XtoY != xx->B) {
1615       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1616       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1617     }
1618     if (!y->xtoy) { /* get xtoy */
1619       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1620       y->XtoY = xx->B;
1621       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1622     }
1623     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1624   } else {
1625     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatConjugate_MPIAIJ"
1634 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1635 {
1636 #if defined(PETSC_USE_COMPLEX)
1637   PetscErrorCode ierr;
1638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1639 
1640   PetscFunctionBegin;
1641   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1642   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1643 #else
1644   PetscFunctionBegin;
1645 #endif
1646   PetscFunctionReturn(0);
1647 }
1648 
1649 #undef __FUNCT__
1650 #define __FUNCT__ "MatRealPart_MPIAIJ"
1651 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1652 {
1653   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1654   PetscErrorCode ierr;
1655 
1656   PetscFunctionBegin;
1657   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1658   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1659   PetscFunctionReturn(0);
1660 }
1661 
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1664 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1665 {
1666   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1671   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #ifdef PETSC_HAVE_PBGL
1676 
1677 #include <boost/parallel/mpi/bsp_process_group.hpp>
1678 #include <boost/graph/distributed/ilu_default_graph.hpp>
1679 #include <boost/graph/distributed/ilu_0_block.hpp>
1680 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1681 #include <boost/graph/distributed/petsc/interface.hpp>
1682 #include <boost/multi_array.hpp>
1683 #include <boost/parallel/distributed_property_map.hpp>
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1687 /*
1688   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1689 */
1690 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1691 {
1692   namespace petsc = boost::distributed::petsc;
1693 
1694   namespace graph_dist = boost::graph::distributed;
1695   using boost::graph::distributed::ilu_default::process_group_type;
1696   using boost::graph::ilu_permuted;
1697 
1698   PetscTruth      row_identity, col_identity;
1699   PetscContainer  c;
1700   PetscInt        m, n, M, N;
1701   PetscErrorCode  ierr;
1702 
1703   PetscFunctionBegin;
1704   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1705   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1706   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1707   if (!row_identity || !col_identity) {
1708     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1709   }
1710 
1711   process_group_type pg;
1712   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1713   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1714   lgraph_type&   level_graph = *lgraph_p;
1715   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1716 
1717   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1718   ilu_permuted(level_graph);
1719 
1720   /* put together the new matrix */
1721   ierr = MatCreate(((PetscObject)A)->comm, fact);CHKERRQ(ierr);
1722   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1723   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1724   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1725   ierr = MatSetType(*fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
1726   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1727   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1728   (*fact)->factor = FACTOR_LU;
1729 
1730   ierr = PetscContainerCreate(((PetscObject)A)->comm, &c);
1731   ierr = PetscContainerSetPointer(c, lgraph_p);
1732   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1738 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1739 {
1740   PetscFunctionBegin;
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatSolve_MPIAIJ"
1746 /*
1747   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1748 */
1749 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1750 {
1751   namespace graph_dist = boost::graph::distributed;
1752 
1753   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1754   lgraph_type*   lgraph_p;
1755   PetscContainer c;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1760   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1761   ierr = VecCopy(b, x); CHKERRQ(ierr);
1762 
1763   PetscScalar* array_x;
1764   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1765   PetscInt sx;
1766   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1767 
1768   PetscScalar* array_b;
1769   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1770   PetscInt sb;
1771   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1772 
1773   lgraph_type&   level_graph = *lgraph_p;
1774   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1775 
1776   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1777   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1778                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1779 
1780   typedef boost::iterator_property_map<array_ref_type::iterator,
1781                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1782   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1783                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1784 
1785   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1786 
1787   PetscFunctionReturn(0);
1788 }
1789 #endif
1790 
1791 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1792   PetscInt       nzlocal,nsends,nrecvs;
1793   PetscMPIInt    *send_rank;
1794   PetscInt       *sbuf_nz,*sbuf_j,**rbuf_j;
1795   PetscScalar    *sbuf_a,**rbuf_a;
1796   PetscErrorCode (*MatDestroy)(Mat);
1797 } Mat_Redundant;
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1801 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1802 {
1803   PetscErrorCode       ierr;
1804   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1805   PetscInt             i;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1809   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1810   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1811   for (i=0; i<redund->nrecvs; i++){
1812     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1813     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1814   }
1815   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1816   ierr = PetscFree(redund);CHKERRQ(ierr);
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatDestroy_MatRedundant"
1822 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1823 {
1824   PetscErrorCode  ierr;
1825   PetscContainer  container;
1826   Mat_Redundant   *redund=PETSC_NULL;
1827 
1828   PetscFunctionBegin;
1829   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1830   if (container) {
1831     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1832   } else {
1833     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1834   }
1835   A->ops->destroy = redund->MatDestroy;
1836   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1837   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1838   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1844 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1845 {
1846   PetscMPIInt    rank,size;
1847   MPI_Comm       comm=((PetscObject)mat)->comm;
1848   PetscErrorCode ierr;
1849   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1850   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1851   PetscInt       *rowrange=mat->rmap.range;
1852   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1853   Mat            A=aij->A,B=aij->B,C=*matredundant;
1854   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1855   PetscScalar    *sbuf_a;
1856   PetscInt       nzlocal=a->nz+b->nz;
1857   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1858   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1859   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1860   MatScalar      *aworkA,*aworkB;
1861   PetscScalar    *vals;
1862   PetscMPIInt    tag1,tag2,tag3,imdex;
1863   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1864                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1865   MPI_Status     recv_status,*send_status;
1866   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1867   PetscInt       **rbuf_j=PETSC_NULL;
1868   PetscScalar    **rbuf_a=PETSC_NULL;
1869   Mat_Redundant  *redund=PETSC_NULL;
1870   PetscContainer container;
1871 
1872   PetscFunctionBegin;
1873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875 
1876   if (reuse == MAT_REUSE_MATRIX) {
1877     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1878     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1879     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1880     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1881     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1882     if (container) {
1883       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1884     } else {
1885       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1886     }
1887     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1888 
1889     nsends    = redund->nsends;
1890     nrecvs    = redund->nrecvs;
1891     send_rank = redund->send_rank; recv_rank = send_rank + size;
1892     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1893     sbuf_j    = redund->sbuf_j;
1894     sbuf_a    = redund->sbuf_a;
1895     rbuf_j    = redund->rbuf_j;
1896     rbuf_a    = redund->rbuf_a;
1897   }
1898 
1899   if (reuse == MAT_INITIAL_MATRIX){
1900     PetscMPIInt  subrank,subsize;
1901     PetscInt     nleftover,np_subcomm;
1902     /* get the destination processors' id send_rank, nsends and nrecvs */
1903     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1904     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1905     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1906     recv_rank = send_rank + size;
1907     np_subcomm = size/nsubcomm;
1908     nleftover  = size - nsubcomm*np_subcomm;
1909     nsends = 0; nrecvs = 0;
1910     for (i=0; i<size; i++){ /* i=rank*/
1911       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1912         send_rank[nsends] = i; nsends++;
1913         recv_rank[nrecvs++] = i;
1914       }
1915     }
1916     if (rank >= size - nleftover){/* this proc is a leftover processor */
1917       i = size-nleftover-1;
1918       j = 0;
1919       while (j < nsubcomm - nleftover){
1920         send_rank[nsends++] = i;
1921         i--; j++;
1922       }
1923     }
1924 
1925     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1926       for (i=0; i<nleftover; i++){
1927         recv_rank[nrecvs++] = size-nleftover+i;
1928       }
1929     }
1930 
1931     /* allocate sbuf_j, sbuf_a */
1932     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1933     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1934     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1935   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1936 
1937   /* copy mat's local entries into the buffers */
1938   if (reuse == MAT_INITIAL_MATRIX){
1939     rownz_max = 0;
1940     rptr = sbuf_j;
1941     cols = sbuf_j + rend-rstart + 1;
1942     vals = sbuf_a;
1943     rptr[0] = 0;
1944     for (i=0; i<rend-rstart; i++){
1945       row = i + rstart;
1946       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1947       ncols  = nzA + nzB;
1948       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1949       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1950       /* load the column indices for this row into cols */
1951       lwrite = 0;
1952       for (l=0; l<nzB; l++) {
1953         if ((ctmp = bmap[cworkB[l]]) < cstart){
1954           vals[lwrite]   = aworkB[l];
1955           cols[lwrite++] = ctmp;
1956         }
1957       }
1958       for (l=0; l<nzA; l++){
1959         vals[lwrite]   = aworkA[l];
1960         cols[lwrite++] = cstart + cworkA[l];
1961       }
1962       for (l=0; l<nzB; l++) {
1963         if ((ctmp = bmap[cworkB[l]]) >= cend){
1964           vals[lwrite]   = aworkB[l];
1965           cols[lwrite++] = ctmp;
1966         }
1967       }
1968       vals += ncols;
1969       cols += ncols;
1970       rptr[i+1] = rptr[i] + ncols;
1971       if (rownz_max < ncols) rownz_max = ncols;
1972     }
1973     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1974   } else { /* only copy matrix values into sbuf_a */
1975     rptr = sbuf_j;
1976     vals = sbuf_a;
1977     rptr[0] = 0;
1978     for (i=0; i<rend-rstart; i++){
1979       row = i + rstart;
1980       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1981       ncols  = nzA + nzB;
1982       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1983       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1984       lwrite = 0;
1985       for (l=0; l<nzB; l++) {
1986         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
1987       }
1988       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
1989       for (l=0; l<nzB; l++) {
1990         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
1991       }
1992       vals += ncols;
1993       rptr[i+1] = rptr[i] + ncols;
1994     }
1995   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1996 
1997   /* send nzlocal to others, and recv other's nzlocal */
1998   /*--------------------------------------------------*/
1999   if (reuse == MAT_INITIAL_MATRIX){
2000     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2001     s_waits2 = s_waits3 + nsends;
2002     s_waits1 = s_waits2 + nsends;
2003     r_waits1 = s_waits1 + nsends;
2004     r_waits2 = r_waits1 + nrecvs;
2005     r_waits3 = r_waits2 + nrecvs;
2006   } else {
2007     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2008     r_waits3 = s_waits3 + nsends;
2009   }
2010 
2011   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2012   if (reuse == MAT_INITIAL_MATRIX){
2013     /* get new tags to keep the communication clean */
2014     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2015     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2016     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2017     rbuf_nz = sbuf_nz + nsends;
2018 
2019     /* post receives of other's nzlocal */
2020     for (i=0; i<nrecvs; i++){
2021       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2022     }
2023     /* send nzlocal to others */
2024     for (i=0; i<nsends; i++){
2025       sbuf_nz[i] = nzlocal;
2026       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2027     }
2028     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2029     count = nrecvs;
2030     while (count) {
2031       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2032       recv_rank[imdex] = recv_status.MPI_SOURCE;
2033       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2034       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2035 
2036       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2037       rbuf_nz[imdex] += i + 2;
2038       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2039       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2040       count--;
2041     }
2042     /* wait on sends of nzlocal */
2043     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2044     /* send mat->i,j to others, and recv from other's */
2045     /*------------------------------------------------*/
2046     for (i=0; i<nsends; i++){
2047       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2048       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2049     }
2050     /* wait on receives of mat->i,j */
2051     /*------------------------------*/
2052     count = nrecvs;
2053     while (count) {
2054       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2055       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2056       count--;
2057     }
2058     /* wait on sends of mat->i,j */
2059     /*---------------------------*/
2060     if (nsends) {
2061       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2062     }
2063   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2064 
2065   /* post receives, send and receive mat->a */
2066   /*----------------------------------------*/
2067   for (imdex=0; imdex<nrecvs; imdex++) {
2068     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2069   }
2070   for (i=0; i<nsends; i++){
2071     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2072   }
2073   count = nrecvs;
2074   while (count) {
2075     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2076     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2077     count--;
2078   }
2079   if (nsends) {
2080     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2081   }
2082 
2083   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2084 
2085   /* create redundant matrix */
2086   /*-------------------------*/
2087   if (reuse == MAT_INITIAL_MATRIX){
2088     /* compute rownz_max for preallocation */
2089     for (imdex=0; imdex<nrecvs; imdex++){
2090       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2091       rptr = rbuf_j[imdex];
2092       for (i=0; i<j; i++){
2093         ncols = rptr[i+1] - rptr[i];
2094         if (rownz_max < ncols) rownz_max = ncols;
2095       }
2096     }
2097 
2098     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2099     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2100     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2101     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2102     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2103   } else {
2104     C = *matredundant;
2105   }
2106 
2107   /* insert local matrix entries */
2108   rptr = sbuf_j;
2109   cols = sbuf_j + rend-rstart + 1;
2110   vals = sbuf_a;
2111   for (i=0; i<rend-rstart; i++){
2112     row   = i + rstart;
2113     ncols = rptr[i+1] - rptr[i];
2114     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2115     vals += ncols;
2116     cols += ncols;
2117   }
2118   /* insert received matrix entries */
2119   for (imdex=0; imdex<nrecvs; imdex++){
2120     rstart = rowrange[recv_rank[imdex]];
2121     rend   = rowrange[recv_rank[imdex]+1];
2122     rptr = rbuf_j[imdex];
2123     cols = rbuf_j[imdex] + rend-rstart + 1;
2124     vals = rbuf_a[imdex];
2125     for (i=0; i<rend-rstart; i++){
2126       row   = i + rstart;
2127       ncols = rptr[i+1] - rptr[i];
2128       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2129       vals += ncols;
2130       cols += ncols;
2131     }
2132   }
2133   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2134   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2135   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2136   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2137   if (reuse == MAT_INITIAL_MATRIX){
2138     PetscContainer container;
2139     *matredundant = C;
2140     /* create a supporting struct and attach it to C for reuse */
2141     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2142     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2143     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2144     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2145     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2146 
2147     redund->nzlocal = nzlocal;
2148     redund->nsends  = nsends;
2149     redund->nrecvs  = nrecvs;
2150     redund->send_rank = send_rank;
2151     redund->sbuf_nz = sbuf_nz;
2152     redund->sbuf_j  = sbuf_j;
2153     redund->sbuf_a  = sbuf_a;
2154     redund->rbuf_j  = rbuf_j;
2155     redund->rbuf_a  = rbuf_a;
2156 
2157     redund->MatDestroy = C->ops->destroy;
2158     C->ops->destroy    = MatDestroy_MatRedundant;
2159   }
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2165 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2166 {
2167   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
2168   PetscInt       n      = A->rmap.n;
2169   PetscInt       cstart = A->cmap.rstart;
2170   PetscInt      *cmap   = mat->garray;
2171   PetscInt      *diagIdx, *offdiagIdx;
2172   Vec            diagV, offdiagV;
2173   PetscScalar   *a, *diagA, *offdiagA;
2174   PetscInt       r;
2175   PetscErrorCode ierr;
2176 
2177   PetscFunctionBegin;
2178   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
2179   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &diagV);CHKERRQ(ierr);
2180   ierr = VecCreateSeq(((PetscObject)A)->comm, n, &offdiagV);CHKERRQ(ierr);
2181   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2182   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2183   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2184   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2185   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2186   for(r = 0; r < n; ++r) {
2187     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2188       a[r]   = diagA[r];
2189       idx[r] = cstart + diagIdx[r];
2190     } else {
2191       a[r]   = offdiagA[r];
2192       idx[r] = cmap[offdiagIdx[r]];
2193     }
2194   }
2195   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2196   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2197   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2198   ierr = VecDestroy(diagV);CHKERRQ(ierr);
2199   ierr = VecDestroy(offdiagV);CHKERRQ(ierr);
2200   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIAIJ"
2206 PetscErrorCode MatGetSeqNonzerostructure_MPIAIJ(Mat mat,Mat *newmat[])
2207 {
2208   PetscErrorCode ierr;
2209 
2210   PetscFunctionBegin;
2211   ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 /* -------------------------------------------------------------------*/
2216 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2217        MatGetRow_MPIAIJ,
2218        MatRestoreRow_MPIAIJ,
2219        MatMult_MPIAIJ,
2220 /* 4*/ MatMultAdd_MPIAIJ,
2221        MatMultTranspose_MPIAIJ,
2222        MatMultTransposeAdd_MPIAIJ,
2223 #ifdef PETSC_HAVE_PBGL
2224        MatSolve_MPIAIJ,
2225 #else
2226        0,
2227 #endif
2228        0,
2229        0,
2230 /*10*/ 0,
2231        0,
2232        0,
2233        MatRelax_MPIAIJ,
2234        MatTranspose_MPIAIJ,
2235 /*15*/ MatGetInfo_MPIAIJ,
2236        MatEqual_MPIAIJ,
2237        MatGetDiagonal_MPIAIJ,
2238        MatDiagonalScale_MPIAIJ,
2239        MatNorm_MPIAIJ,
2240 /*20*/ MatAssemblyBegin_MPIAIJ,
2241        MatAssemblyEnd_MPIAIJ,
2242        0,
2243        MatSetOption_MPIAIJ,
2244        MatZeroEntries_MPIAIJ,
2245 /*25*/ MatZeroRows_MPIAIJ,
2246        0,
2247 #ifdef PETSC_HAVE_PBGL
2248        MatLUFactorNumeric_MPIAIJ,
2249 #else
2250        0,
2251 #endif
2252        0,
2253        0,
2254 /*30*/ MatSetUpPreallocation_MPIAIJ,
2255 #ifdef PETSC_HAVE_PBGL
2256        MatILUFactorSymbolic_MPIAIJ,
2257 #else
2258        0,
2259 #endif
2260        0,
2261        0,
2262        0,
2263 /*35*/ MatDuplicate_MPIAIJ,
2264        0,
2265        0,
2266        0,
2267        0,
2268 /*40*/ MatAXPY_MPIAIJ,
2269        MatGetSubMatrices_MPIAIJ,
2270        MatIncreaseOverlap_MPIAIJ,
2271        MatGetValues_MPIAIJ,
2272        MatCopy_MPIAIJ,
2273 /*45*/ 0,
2274        MatScale_MPIAIJ,
2275        0,
2276        0,
2277        0,
2278 /*50*/ MatSetBlockSize_MPIAIJ,
2279        0,
2280        0,
2281        0,
2282        0,
2283 /*55*/ MatFDColoringCreate_MPIAIJ,
2284        0,
2285        MatSetUnfactored_MPIAIJ,
2286        MatPermute_MPIAIJ,
2287        0,
2288 /*60*/ MatGetSubMatrix_MPIAIJ,
2289        MatDestroy_MPIAIJ,
2290        MatView_MPIAIJ,
2291        0,
2292        0,
2293 /*65*/ 0,
2294        0,
2295        0,
2296        0,
2297        0,
2298 /*70*/ 0,
2299        0,
2300        MatSetColoring_MPIAIJ,
2301 #if defined(PETSC_HAVE_ADIC)
2302        MatSetValuesAdic_MPIAIJ,
2303 #else
2304        0,
2305 #endif
2306        MatSetValuesAdifor_MPIAIJ,
2307 /*75*/ 0,
2308        0,
2309        0,
2310        0,
2311        0,
2312 /*80*/ 0,
2313        0,
2314        0,
2315        0,
2316 /*84*/ MatLoad_MPIAIJ,
2317        0,
2318        0,
2319        0,
2320        0,
2321        0,
2322 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2323        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2324        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2325        MatPtAP_Basic,
2326        MatPtAPSymbolic_MPIAIJ,
2327 /*95*/ MatPtAPNumeric_MPIAIJ,
2328        0,
2329        0,
2330        0,
2331        0,
2332 /*100*/0,
2333        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2334        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2335        MatConjugate_MPIAIJ,
2336        0,
2337 /*105*/MatSetValuesRow_MPIAIJ,
2338        MatRealPart_MPIAIJ,
2339        MatImaginaryPart_MPIAIJ,
2340        0,
2341        0,
2342 /*110*/0,
2343        MatGetRedundantMatrix_MPIAIJ,
2344        MatGetRowMin_MPIAIJ,
2345        0,
2346        0,
2347 /*115*/MatGetSeqNonzerostructure_MPIAIJ};
2348 
2349 /* ----------------------------------------------------------------------------------------*/
2350 
2351 EXTERN_C_BEGIN
2352 #undef __FUNCT__
2353 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2354 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2355 {
2356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2357   PetscErrorCode ierr;
2358 
2359   PetscFunctionBegin;
2360   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2361   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 EXTERN_C_END
2365 
2366 EXTERN_C_BEGIN
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2369 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2370 {
2371   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2376   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 EXTERN_C_END
2380 
2381 #include "petscpc.h"
2382 EXTERN_C_BEGIN
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2385 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2386 {
2387   Mat_MPIAIJ     *b;
2388   PetscErrorCode ierr;
2389   PetscInt       i;
2390 
2391   PetscFunctionBegin;
2392   B->preallocated = PETSC_TRUE;
2393   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2394   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2395   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2396   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2397 
2398   B->rmap.bs = B->cmap.bs = 1;
2399   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2400   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2401   if (d_nnz) {
2402     for (i=0; i<B->rmap.n; i++) {
2403       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2404     }
2405   }
2406   if (o_nnz) {
2407     for (i=0; i<B->rmap.n; i++) {
2408       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2409     }
2410   }
2411   b = (Mat_MPIAIJ*)B->data;
2412 
2413   /* Explicitly create 2 MATSEQAIJ matrices. */
2414   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2415   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2416   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2417   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2418   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2419   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2420   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2421   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2422 
2423   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2424   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2425 
2426   PetscFunctionReturn(0);
2427 }
2428 EXTERN_C_END
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2432 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2433 {
2434   Mat            mat;
2435   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2436   PetscErrorCode ierr;
2437 
2438   PetscFunctionBegin;
2439   *newmat       = 0;
2440   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2441   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2442   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2443   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2444   a    = (Mat_MPIAIJ*)mat->data;
2445 
2446   mat->factor       = matin->factor;
2447   mat->rmap.bs      = matin->rmap.bs;
2448   mat->assembled    = PETSC_TRUE;
2449   mat->insertmode   = NOT_SET_VALUES;
2450   mat->preallocated = PETSC_TRUE;
2451 
2452   a->size           = oldmat->size;
2453   a->rank           = oldmat->rank;
2454   a->donotstash     = oldmat->donotstash;
2455   a->roworiented    = oldmat->roworiented;
2456   a->rowindices     = 0;
2457   a->rowvalues      = 0;
2458   a->getrowactive   = PETSC_FALSE;
2459 
2460   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2461   ierr = PetscMapCopy(((PetscObject)mat)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2462 
2463   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2464   if (oldmat->colmap) {
2465 #if defined (PETSC_USE_CTABLE)
2466     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2467 #else
2468     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2469     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2470     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2471 #endif
2472   } else a->colmap = 0;
2473   if (oldmat->garray) {
2474     PetscInt len;
2475     len  = oldmat->B->cmap.n;
2476     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2477     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2478     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2479   } else a->garray = 0;
2480 
2481   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2482   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2483   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2484   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2485   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2486   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2487   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2488   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2489   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2490   *newmat = mat;
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #include "petscsys.h"
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatLoad_MPIAIJ"
2498 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2499 {
2500   Mat            A;
2501   PetscScalar    *vals,*svals;
2502   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2503   MPI_Status     status;
2504   PetscErrorCode ierr;
2505   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2506   PetscInt       i,nz,j,rstart,rend,mmax;
2507   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2508   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2509   PetscInt       cend,cstart,n,*rowners;
2510   int            fd;
2511 
2512   PetscFunctionBegin;
2513   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2514   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2515   if (!rank) {
2516     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2517     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2518     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2519   }
2520 
2521   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2522   M = header[1]; N = header[2];
2523   /* determine ownership of all rows */
2524   m    = M/size + ((M % size) > rank);
2525   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2526   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2527 
2528   /* First process needs enough room for process with most rows */
2529   if (!rank) {
2530     mmax       = rowners[1];
2531     for (i=2; i<size; i++) {
2532       mmax = PetscMax(mmax,rowners[i]);
2533     }
2534   } else mmax = m;
2535 
2536   rowners[0] = 0;
2537   for (i=2; i<=size; i++) {
2538     rowners[i] += rowners[i-1];
2539   }
2540   rstart = rowners[rank];
2541   rend   = rowners[rank+1];
2542 
2543   /* distribute row lengths to all processors */
2544   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2545   if (!rank) {
2546     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2547     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2548     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2549     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2550     for (j=0; j<m; j++) {
2551       procsnz[0] += ourlens[j];
2552     }
2553     for (i=1; i<size; i++) {
2554       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2555       /* calculate the number of nonzeros on each processor */
2556       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2557         procsnz[i] += rowlengths[j];
2558       }
2559       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2560     }
2561     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2562   } else {
2563     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2564   }
2565 
2566   if (!rank) {
2567     /* determine max buffer needed and allocate it */
2568     maxnz = 0;
2569     for (i=0; i<size; i++) {
2570       maxnz = PetscMax(maxnz,procsnz[i]);
2571     }
2572     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2573 
2574     /* read in my part of the matrix column indices  */
2575     nz   = procsnz[0];
2576     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2577     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2578 
2579     /* read in every one elses and ship off */
2580     for (i=1; i<size; i++) {
2581       nz   = procsnz[i];
2582       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2583       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2584     }
2585     ierr = PetscFree(cols);CHKERRQ(ierr);
2586   } else {
2587     /* determine buffer space needed for message */
2588     nz = 0;
2589     for (i=0; i<m; i++) {
2590       nz += ourlens[i];
2591     }
2592     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2593 
2594     /* receive message of column indices*/
2595     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2596     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2597     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2598   }
2599 
2600   /* determine column ownership if matrix is not square */
2601   if (N != M) {
2602     n      = N/size + ((N % size) > rank);
2603     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2604     cstart = cend - n;
2605   } else {
2606     cstart = rstart;
2607     cend   = rend;
2608     n      = cend - cstart;
2609   }
2610 
2611   /* loop over local rows, determining number of off diagonal entries */
2612   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2613   jj = 0;
2614   for (i=0; i<m; i++) {
2615     for (j=0; j<ourlens[i]; j++) {
2616       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2617       jj++;
2618     }
2619   }
2620 
2621   /* create our matrix */
2622   for (i=0; i<m; i++) {
2623     ourlens[i] -= offlens[i];
2624   }
2625   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2626   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2627   ierr = MatSetType(A,type);CHKERRQ(ierr);
2628   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2629 
2630   for (i=0; i<m; i++) {
2631     ourlens[i] += offlens[i];
2632   }
2633 
2634   if (!rank) {
2635     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2636 
2637     /* read in my part of the matrix numerical values  */
2638     nz   = procsnz[0];
2639     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2640 
2641     /* insert into matrix */
2642     jj      = rstart;
2643     smycols = mycols;
2644     svals   = vals;
2645     for (i=0; i<m; i++) {
2646       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2647       smycols += ourlens[i];
2648       svals   += ourlens[i];
2649       jj++;
2650     }
2651 
2652     /* read in other processors and ship out */
2653     for (i=1; i<size; i++) {
2654       nz   = procsnz[i];
2655       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2656       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2657     }
2658     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2659   } else {
2660     /* receive numeric values */
2661     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2662 
2663     /* receive message of values*/
2664     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2665     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2666     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2667 
2668     /* insert into matrix */
2669     jj      = rstart;
2670     smycols = mycols;
2671     svals   = vals;
2672     for (i=0; i<m; i++) {
2673       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2674       smycols += ourlens[i];
2675       svals   += ourlens[i];
2676       jj++;
2677     }
2678   }
2679   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2680   ierr = PetscFree(vals);CHKERRQ(ierr);
2681   ierr = PetscFree(mycols);CHKERRQ(ierr);
2682   ierr = PetscFree(rowners);CHKERRQ(ierr);
2683 
2684   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2685   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2686   *newmat = A;
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2692 /*
2693     Not great since it makes two copies of the submatrix, first an SeqAIJ
2694   in local and then by concatenating the local matrices the end result.
2695   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2696 */
2697 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2698 {
2699   PetscErrorCode ierr;
2700   PetscMPIInt    rank,size;
2701   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2702   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2703   Mat            *local,M,Mreuse;
2704   MatScalar      *vwork,*aa;
2705   MPI_Comm       comm = ((PetscObject)mat)->comm;
2706   Mat_SeqAIJ     *aij;
2707 
2708 
2709   PetscFunctionBegin;
2710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2712 
2713   if (call ==  MAT_REUSE_MATRIX) {
2714     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2715     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2716     local = &Mreuse;
2717     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2718   } else {
2719     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2720     Mreuse = *local;
2721     ierr   = PetscFree(local);CHKERRQ(ierr);
2722   }
2723 
2724   /*
2725       m - number of local rows
2726       n - number of columns (same on all processors)
2727       rstart - first row in new global matrix generated
2728   */
2729   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2730   if (call == MAT_INITIAL_MATRIX) {
2731     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2732     ii  = aij->i;
2733     jj  = aij->j;
2734 
2735     /*
2736         Determine the number of non-zeros in the diagonal and off-diagonal
2737         portions of the matrix in order to do correct preallocation
2738     */
2739 
2740     /* first get start and end of "diagonal" columns */
2741     if (csize == PETSC_DECIDE) {
2742       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2743       if (mglobal == n) { /* square matrix */
2744 	nlocal = m;
2745       } else {
2746         nlocal = n/size + ((n % size) > rank);
2747       }
2748     } else {
2749       nlocal = csize;
2750     }
2751     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2752     rstart = rend - nlocal;
2753     if (rank == size - 1 && rend != n) {
2754       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2755     }
2756 
2757     /* next, compute all the lengths */
2758     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2759     olens = dlens + m;
2760     for (i=0; i<m; i++) {
2761       jend = ii[i+1] - ii[i];
2762       olen = 0;
2763       dlen = 0;
2764       for (j=0; j<jend; j++) {
2765         if (*jj < rstart || *jj >= rend) olen++;
2766         else dlen++;
2767         jj++;
2768       }
2769       olens[i] = olen;
2770       dlens[i] = dlen;
2771     }
2772     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2773     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2774     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2775     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2776     ierr = PetscFree(dlens);CHKERRQ(ierr);
2777   } else {
2778     PetscInt ml,nl;
2779 
2780     M = *newmat;
2781     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2782     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2783     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2784     /*
2785          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2786        rather than the slower MatSetValues().
2787     */
2788     M->was_assembled = PETSC_TRUE;
2789     M->assembled     = PETSC_FALSE;
2790   }
2791   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2792   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2793   ii  = aij->i;
2794   jj  = aij->j;
2795   aa  = aij->a;
2796   for (i=0; i<m; i++) {
2797     row   = rstart + i;
2798     nz    = ii[i+1] - ii[i];
2799     cwork = jj;     jj += nz;
2800     vwork = aa;     aa += nz;
2801     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2802   }
2803 
2804   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2805   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2806   *newmat = M;
2807 
2808   /* save submatrix used in processor for next request */
2809   if (call ==  MAT_INITIAL_MATRIX) {
2810     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2811     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2812   }
2813 
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 EXTERN_C_BEGIN
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2820 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2821 {
2822   PetscInt       m,cstart, cend,j,nnz,i,d;
2823   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2824   const PetscInt *JJ;
2825   PetscScalar    *values;
2826   PetscErrorCode ierr;
2827 
2828   PetscFunctionBegin;
2829   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2830 
2831   B->rmap.bs = B->cmap.bs = 1;
2832   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2833   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2834   m      = B->rmap.n;
2835   cstart = B->cmap.rstart;
2836   cend   = B->cmap.rend;
2837   rstart = B->rmap.rstart;
2838 
2839   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2840   o_nnz = d_nnz + m;
2841 
2842 #if defined(PETSC_USE_DEBUGGING)
2843   for (i=0; i<m; i++) {
2844     nnz     = Ii[i+1]- Ii[i];
2845     JJ      = J + Ii[i];
2846     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2847     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
2848     if (nnz && (JJ[nnz-1] >= B->cmap.N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap.N);
2849     for (j=1; j<nnz; j++) {
2850       if (JJ[i] <= JJ[i-1]) SETERRRQ(PETSC_ERR_ARG_WRONGSTATE,"Row %D has unsorted column index at %D location in column indices",i,j);
2851     }
2852   }
2853 #endif
2854 
2855   for (i=0; i<m; i++) {
2856     nnz     = Ii[i+1]- Ii[i];
2857     JJ      = J + Ii[i];
2858     nnz_max = PetscMax(nnz_max,nnz);
2859     for (j=0; j<nnz; j++) {
2860       if (*JJ >= cstart) break;
2861       JJ++;
2862     }
2863     d = 0;
2864     for (; j<nnz; j++) {
2865       if (*JJ++ >= cend) break;
2866       d++;
2867     }
2868     d_nnz[i] = d;
2869     o_nnz[i] = nnz - d;
2870   }
2871   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2872   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2873 
2874   if (v) values = (PetscScalar*)v;
2875   else {
2876     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2877     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2878   }
2879 
2880   for (i=0; i<m; i++) {
2881     ii   = i + rstart;
2882     nnz  = Ii[i+1]- Ii[i];
2883     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2884   }
2885   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2886   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887 
2888   if (!v) {
2889     ierr = PetscFree(values);CHKERRQ(ierr);
2890   }
2891   PetscFunctionReturn(0);
2892 }
2893 EXTERN_C_END
2894 
2895 #undef __FUNCT__
2896 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2897 /*@
2898    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2899    (the default parallel PETSc format).
2900 
2901    Collective on MPI_Comm
2902 
2903    Input Parameters:
2904 +  B - the matrix
2905 .  i - the indices into j for the start of each local row (starts with zero)
2906 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2907 -  v - optional values in the matrix
2908 
2909    Level: developer
2910 
2911    Notes:
2912        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2913      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2914      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2915 
2916        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2917 
2918        The format which is used for the sparse matrix input, is equivalent to a
2919     row-major ordering.. i.e for the following matrix, the input data expected is
2920     as shown:
2921 
2922         1 0 0
2923         2 0 3     P0
2924        -------
2925         4 5 6     P1
2926 
2927      Process0 [P0]: rows_owned=[0,1]
2928         i =  {0,1,3}  [size = nrow+1  = 2+1]
2929         j =  {0,0,2}  [size = nz = 6]
2930         v =  {1,2,3}  [size = nz = 6]
2931 
2932      Process1 [P1]: rows_owned=[2]
2933         i =  {0,3}    [size = nrow+1  = 1+1]
2934         j =  {0,1,2}  [size = nz = 6]
2935         v =  {4,5,6}  [size = nz = 6]
2936 
2937       The column indices for each row MUST be sorted.
2938 
2939 .keywords: matrix, aij, compressed row, sparse, parallel
2940 
2941 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2942           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2943 @*/
2944 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2945 {
2946   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2947 
2948   PetscFunctionBegin;
2949   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2950   if (f) {
2951     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2952   }
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2958 /*@C
2959    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2960    (the default parallel PETSc format).  For good matrix assembly performance
2961    the user should preallocate the matrix storage by setting the parameters
2962    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2963    performance can be increased by more than a factor of 50.
2964 
2965    Collective on MPI_Comm
2966 
2967    Input Parameters:
2968 +  A - the matrix
2969 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2970            (same value is used for all local rows)
2971 .  d_nnz - array containing the number of nonzeros in the various rows of the
2972            DIAGONAL portion of the local submatrix (possibly different for each row)
2973            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2974            The size of this array is equal to the number of local rows, i.e 'm'.
2975            You must leave room for the diagonal entry even if it is zero.
2976 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2977            submatrix (same value is used for all local rows).
2978 -  o_nnz - array containing the number of nonzeros in the various rows of the
2979            OFF-DIAGONAL portion of the local submatrix (possibly different for
2980            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2981            structure. The size of this array is equal to the number
2982            of local rows, i.e 'm'.
2983 
2984    If the *_nnz parameter is given then the *_nz parameter is ignored
2985 
2986    The AIJ format (also called the Yale sparse matrix format or
2987    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2988    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2989 
2990    The parallel matrix is partitioned such that the first m0 rows belong to
2991    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2992    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2993 
2994    The DIAGONAL portion of the local submatrix of a processor can be defined
2995    as the submatrix which is obtained by extraction the part corresponding
2996    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2997    first row that belongs to the processor, and r2 is the last row belonging
2998    to the this processor. This is a square mxm matrix. The remaining portion
2999    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3000 
3001    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3002 
3003    You can call MatGetInfo() to get information on how effective the preallocation was;
3004    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3005    You can also run with the option -info and look for messages with the string
3006    malloc in them to see if additional memory allocation was needed.
3007 
3008    Example usage:
3009 
3010    Consider the following 8x8 matrix with 34 non-zero values, that is
3011    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3012    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3013    as follows:
3014 
3015 .vb
3016             1  2  0  |  0  3  0  |  0  4
3017     Proc0   0  5  6  |  7  0  0  |  8  0
3018             9  0 10  | 11  0  0  | 12  0
3019     -------------------------------------
3020            13  0 14  | 15 16 17  |  0  0
3021     Proc1   0 18  0  | 19 20 21  |  0  0
3022             0  0  0  | 22 23  0  | 24  0
3023     -------------------------------------
3024     Proc2  25 26 27  |  0  0 28  | 29  0
3025            30  0  0  | 31 32 33  |  0 34
3026 .ve
3027 
3028    This can be represented as a collection of submatrices as:
3029 
3030 .vb
3031       A B C
3032       D E F
3033       G H I
3034 .ve
3035 
3036    Where the submatrices A,B,C are owned by proc0, D,E,F are
3037    owned by proc1, G,H,I are owned by proc2.
3038 
3039    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3040    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3041    The 'M','N' parameters are 8,8, and have the same values on all procs.
3042 
3043    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3044    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3045    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3046    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3047    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3048    matrix, ans [DF] as another SeqAIJ matrix.
3049 
3050    When d_nz, o_nz parameters are specified, d_nz storage elements are
3051    allocated for every row of the local diagonal submatrix, and o_nz
3052    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3053    One way to choose d_nz and o_nz is to use the max nonzerors per local
3054    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3055    In this case, the values of d_nz,o_nz are:
3056 .vb
3057      proc0 : dnz = 2, o_nz = 2
3058      proc1 : dnz = 3, o_nz = 2
3059      proc2 : dnz = 1, o_nz = 4
3060 .ve
3061    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3062    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3063    for proc3. i.e we are using 12+15+10=37 storage locations to store
3064    34 values.
3065 
3066    When d_nnz, o_nnz parameters are specified, the storage is specified
3067    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3068    In the above case the values for d_nnz,o_nnz are:
3069 .vb
3070      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3071      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3072      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3073 .ve
3074    Here the space allocated is sum of all the above values i.e 34, and
3075    hence pre-allocation is perfect.
3076 
3077    Level: intermediate
3078 
3079 .keywords: matrix, aij, compressed row, sparse, parallel
3080 
3081 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3082           MPIAIJ, MatGetInfo()
3083 @*/
3084 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3085 {
3086   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3087 
3088   PetscFunctionBegin;
3089   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3090   if (f) {
3091     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3092   }
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 #undef __FUNCT__
3097 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3098 /*@
3099      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3100          CSR format the local rows.
3101 
3102    Collective on MPI_Comm
3103 
3104    Input Parameters:
3105 +  comm - MPI communicator
3106 .  m - number of local rows (Cannot be PETSC_DECIDE)
3107 .  n - This value should be the same as the local size used in creating the
3108        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3109        calculated if N is given) For square matrices n is almost always m.
3110 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3111 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3112 .   i - row indices
3113 .   j - column indices
3114 -   a - matrix values
3115 
3116    Output Parameter:
3117 .   mat - the matrix
3118 
3119    Level: intermediate
3120 
3121    Notes:
3122        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3123      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3124      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3125 
3126        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3127 
3128        The format which is used for the sparse matrix input, is equivalent to a
3129     row-major ordering.. i.e for the following matrix, the input data expected is
3130     as shown:
3131 
3132         1 0 0
3133         2 0 3     P0
3134        -------
3135         4 5 6     P1
3136 
3137      Process0 [P0]: rows_owned=[0,1]
3138         i =  {0,1,3}  [size = nrow+1  = 2+1]
3139         j =  {0,0,2}  [size = nz = 6]
3140         v =  {1,2,3}  [size = nz = 6]
3141 
3142      Process1 [P1]: rows_owned=[2]
3143         i =  {0,3}    [size = nrow+1  = 1+1]
3144         j =  {0,1,2}  [size = nz = 6]
3145         v =  {4,5,6}  [size = nz = 6]
3146 
3147 .keywords: matrix, aij, compressed row, sparse, parallel
3148 
3149 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3150           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3151 @*/
3152 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3153 {
3154   PetscErrorCode ierr;
3155 
3156  PetscFunctionBegin;
3157   if (i[0]) {
3158     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3159   }
3160   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3161   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3162   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3163   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3164   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3165   PetscFunctionReturn(0);
3166 }
3167 
3168 #undef __FUNCT__
3169 #define __FUNCT__ "MatCreateMPIAIJ"
3170 /*@C
3171    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3172    (the default parallel PETSc format).  For good matrix assembly performance
3173    the user should preallocate the matrix storage by setting the parameters
3174    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3175    performance can be increased by more than a factor of 50.
3176 
3177    Collective on MPI_Comm
3178 
3179    Input Parameters:
3180 +  comm - MPI communicator
3181 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3182            This value should be the same as the local size used in creating the
3183            y vector for the matrix-vector product y = Ax.
3184 .  n - This value should be the same as the local size used in creating the
3185        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3186        calculated if N is given) For square matrices n is almost always m.
3187 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3188 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3189 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3190            (same value is used for all local rows)
3191 .  d_nnz - array containing the number of nonzeros in the various rows of the
3192            DIAGONAL portion of the local submatrix (possibly different for each row)
3193            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3194            The size of this array is equal to the number of local rows, i.e 'm'.
3195            You must leave room for the diagonal entry even if it is zero.
3196 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3197            submatrix (same value is used for all local rows).
3198 -  o_nnz - array containing the number of nonzeros in the various rows of the
3199            OFF-DIAGONAL portion of the local submatrix (possibly different for
3200            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3201            structure. The size of this array is equal to the number
3202            of local rows, i.e 'm'.
3203 
3204    Output Parameter:
3205 .  A - the matrix
3206 
3207    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3208    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3209    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3210    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3211 
3212    Notes:
3213    If the *_nnz parameter is given then the *_nz parameter is ignored
3214 
3215    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3216    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3217    storage requirements for this matrix.
3218 
3219    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3220    processor than it must be used on all processors that share the object for
3221    that argument.
3222 
3223    The user MUST specify either the local or global matrix dimensions
3224    (possibly both).
3225 
3226    The parallel matrix is partitioned across processors such that the
3227    first m0 rows belong to process 0, the next m1 rows belong to
3228    process 1, the next m2 rows belong to process 2 etc.. where
3229    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
3230    values corresponding to [m x N] submatrix.
3231 
3232    The columns are logically partitioned with the n0 columns belonging
3233    to 0th partition, the next n1 columns belonging to the next
3234    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
3235 
3236    The DIAGONAL portion of the local submatrix on any given processor
3237    is the submatrix corresponding to the rows and columns m,n
3238    corresponding to the given processor. i.e diagonal matrix on
3239    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
3240    etc. The remaining portion of the local submatrix [m x (N-n)]
3241    constitute the OFF-DIAGONAL portion. The example below better
3242    illustrates this concept.
3243 
3244    For a square global matrix we define each processor's diagonal portion
3245    to be its local rows and the corresponding columns (a square submatrix);
3246    each processor's off-diagonal portion encompasses the remainder of the
3247    local matrix (a rectangular submatrix).
3248 
3249    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3250 
3251    When calling this routine with a single process communicator, a matrix of
3252    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3253    type of communicator, use the construction mechanism:
3254      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3255 
3256    By default, this format uses inodes (identical nodes) when possible.
3257    We search for consecutive rows with the same nonzero structure, thereby
3258    reusing matrix information to achieve increased efficiency.
3259 
3260    Options Database Keys:
3261 +  -mat_no_inode  - Do not use inodes
3262 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3263 -  -mat_aij_oneindex - Internally use indexing starting at 1
3264         rather than 0.  Note that when calling MatSetValues(),
3265         the user still MUST index entries starting at 0!
3266 
3267 
3268    Example usage:
3269 
3270    Consider the following 8x8 matrix with 34 non-zero values, that is
3271    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3272    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3273    as follows:
3274 
3275 .vb
3276             1  2  0  |  0  3  0  |  0  4
3277     Proc0   0  5  6  |  7  0  0  |  8  0
3278             9  0 10  | 11  0  0  | 12  0
3279     -------------------------------------
3280            13  0 14  | 15 16 17  |  0  0
3281     Proc1   0 18  0  | 19 20 21  |  0  0
3282             0  0  0  | 22 23  0  | 24  0
3283     -------------------------------------
3284     Proc2  25 26 27  |  0  0 28  | 29  0
3285            30  0  0  | 31 32 33  |  0 34
3286 .ve
3287 
3288    This can be represented as a collection of submatrices as:
3289 
3290 .vb
3291       A B C
3292       D E F
3293       G H I
3294 .ve
3295 
3296    Where the submatrices A,B,C are owned by proc0, D,E,F are
3297    owned by proc1, G,H,I are owned by proc2.
3298 
3299    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3300    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3301    The 'M','N' parameters are 8,8, and have the same values on all procs.
3302 
3303    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3304    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3305    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3306    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3307    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3308    matrix, ans [DF] as another SeqAIJ matrix.
3309 
3310    When d_nz, o_nz parameters are specified, d_nz storage elements are
3311    allocated for every row of the local diagonal submatrix, and o_nz
3312    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3313    One way to choose d_nz and o_nz is to use the max nonzerors per local
3314    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3315    In this case, the values of d_nz,o_nz are:
3316 .vb
3317      proc0 : dnz = 2, o_nz = 2
3318      proc1 : dnz = 3, o_nz = 2
3319      proc2 : dnz = 1, o_nz = 4
3320 .ve
3321    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3322    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3323    for proc3. i.e we are using 12+15+10=37 storage locations to store
3324    34 values.
3325 
3326    When d_nnz, o_nnz parameters are specified, the storage is specified
3327    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3328    In the above case the values for d_nnz,o_nnz are:
3329 .vb
3330      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3331      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3332      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3333 .ve
3334    Here the space allocated is sum of all the above values i.e 34, and
3335    hence pre-allocation is perfect.
3336 
3337    Level: intermediate
3338 
3339 .keywords: matrix, aij, compressed row, sparse, parallel
3340 
3341 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3342           MPIAIJ, MatCreateMPIAIJWithArrays()
3343 @*/
3344 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3345 {
3346   PetscErrorCode ierr;
3347   PetscMPIInt    size;
3348 
3349   PetscFunctionBegin;
3350   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3351   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3352   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3353   if (size > 1) {
3354     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3355     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3356   } else {
3357     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3358     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3359   }
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3365 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3366 {
3367   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3368 
3369   PetscFunctionBegin;
3370   *Ad     = a->A;
3371   *Ao     = a->B;
3372   *colmap = a->garray;
3373   PetscFunctionReturn(0);
3374 }
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3378 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3379 {
3380   PetscErrorCode ierr;
3381   PetscInt       i;
3382   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3383 
3384   PetscFunctionBegin;
3385   if (coloring->ctype == IS_COLORING_GLOBAL) {
3386     ISColoringValue *allcolors,*colors;
3387     ISColoring      ocoloring;
3388 
3389     /* set coloring for diagonal portion */
3390     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3391 
3392     /* set coloring for off-diagonal portion */
3393     ierr = ISAllGatherColors(((PetscObject)A)->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3394     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3395     for (i=0; i<a->B->cmap.n; i++) {
3396       colors[i] = allcolors[a->garray[i]];
3397     }
3398     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3399     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3400     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3401     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3402   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3403     ISColoringValue *colors;
3404     PetscInt        *larray;
3405     ISColoring      ocoloring;
3406 
3407     /* set coloring for diagonal portion */
3408     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3409     for (i=0; i<a->A->cmap.n; i++) {
3410       larray[i] = i + A->cmap.rstart;
3411     }
3412     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3413     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3414     for (i=0; i<a->A->cmap.n; i++) {
3415       colors[i] = coloring->colors[larray[i]];
3416     }
3417     ierr = PetscFree(larray);CHKERRQ(ierr);
3418     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3419     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3420     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3421 
3422     /* set coloring for off-diagonal portion */
3423     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3424     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3425     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3426     for (i=0; i<a->B->cmap.n; i++) {
3427       colors[i] = coloring->colors[larray[i]];
3428     }
3429     ierr = PetscFree(larray);CHKERRQ(ierr);
3430     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3431     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3432     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3433   } else {
3434     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3435   }
3436 
3437   PetscFunctionReturn(0);
3438 }
3439 
3440 #if defined(PETSC_HAVE_ADIC)
3441 #undef __FUNCT__
3442 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3443 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3444 {
3445   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3446   PetscErrorCode ierr;
3447 
3448   PetscFunctionBegin;
3449   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3450   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3451   PetscFunctionReturn(0);
3452 }
3453 #endif
3454 
3455 #undef __FUNCT__
3456 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3457 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3458 {
3459   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3460   PetscErrorCode ierr;
3461 
3462   PetscFunctionBegin;
3463   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3464   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatMerge"
3470 /*@
3471       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3472                  matrices from each processor
3473 
3474     Collective on MPI_Comm
3475 
3476    Input Parameters:
3477 +    comm - the communicators the parallel matrix will live on
3478 .    inmat - the input sequential matrices
3479 .    n - number of local columns (or PETSC_DECIDE)
3480 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3481 
3482    Output Parameter:
3483 .    outmat - the parallel matrix generated
3484 
3485     Level: advanced
3486 
3487    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3488 
3489 @*/
3490 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3491 {
3492   PetscErrorCode ierr;
3493   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3494   PetscInt       *indx;
3495   PetscScalar    *values;
3496 
3497   PetscFunctionBegin;
3498   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3499   if (scall == MAT_INITIAL_MATRIX){
3500     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3501     if (n == PETSC_DECIDE){
3502       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3503     }
3504     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3505     rstart -= m;
3506 
3507     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3508     for (i=0;i<m;i++) {
3509       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3510       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3511       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3512     }
3513     /* This routine will ONLY return MPIAIJ type matrix */
3514     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3515     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3516     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3517     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3518     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3519 
3520   } else if (scall == MAT_REUSE_MATRIX){
3521     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3522   } else {
3523     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3524   }
3525 
3526   for (i=0;i<m;i++) {
3527     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3528     Ii    = i + rstart;
3529     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3530     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3531   }
3532   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3533   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3534   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3535 
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatFileSplit"
3541 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3542 {
3543   PetscErrorCode    ierr;
3544   PetscMPIInt       rank;
3545   PetscInt          m,N,i,rstart,nnz;
3546   size_t            len;
3547   const PetscInt    *indx;
3548   PetscViewer       out;
3549   char              *name;
3550   Mat               B;
3551   const PetscScalar *values;
3552 
3553   PetscFunctionBegin;
3554   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3555   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3556   /* Should this be the type of the diagonal block of A? */
3557   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3558   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3559   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3560   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3561   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3562   for (i=0;i<m;i++) {
3563     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3564     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3565     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3566   }
3567   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3568   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3569 
3570   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
3571   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3572   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3573   sprintf(name,"%s.%d",outfile,rank);
3574   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3575   ierr = PetscFree(name);
3576   ierr = MatView(B,out);CHKERRQ(ierr);
3577   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3578   ierr = MatDestroy(B);CHKERRQ(ierr);
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3583 #undef __FUNCT__
3584 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3585 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3586 {
3587   PetscErrorCode       ierr;
3588   Mat_Merge_SeqsToMPI  *merge;
3589   PetscContainer       container;
3590 
3591   PetscFunctionBegin;
3592   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3593   if (container) {
3594     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3595     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3596     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3597     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3598     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3599     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3600     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3601     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3602     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3603     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3604     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3605     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3606 
3607     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3608     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3609   }
3610   ierr = PetscFree(merge);CHKERRQ(ierr);
3611 
3612   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3613   PetscFunctionReturn(0);
3614 }
3615 
3616 #include "src/mat/utils/freespace.h"
3617 #include "petscbt.h"
3618 
3619 #undef __FUNCT__
3620 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3621 /*@C
3622       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3623                  matrices from each processor
3624 
3625     Collective on MPI_Comm
3626 
3627    Input Parameters:
3628 +    comm - the communicators the parallel matrix will live on
3629 .    seqmat - the input sequential matrices
3630 .    m - number of local rows (or PETSC_DECIDE)
3631 .    n - number of local columns (or PETSC_DECIDE)
3632 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3633 
3634    Output Parameter:
3635 .    mpimat - the parallel matrix generated
3636 
3637     Level: advanced
3638 
3639    Notes:
3640      The dimensions of the sequential matrix in each processor MUST be the same.
3641      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3642      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3643 @*/
3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3645 {
3646   PetscErrorCode       ierr;
3647   MPI_Comm             comm=((PetscObject)mpimat)->comm;
3648   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3649   PetscMPIInt          size,rank,taga,*len_s;
3650   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3651   PetscInt             proc,m;
3652   PetscInt             **buf_ri,**buf_rj;
3653   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3654   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3655   MPI_Request          *s_waits,*r_waits;
3656   MPI_Status           *status;
3657   MatScalar            *aa=a->a;
3658   PetscScalar          **abuf_r,*ba_i;
3659   Mat_Merge_SeqsToMPI  *merge;
3660   PetscContainer       container;
3661 
3662   PetscFunctionBegin;
3663   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3664 
3665   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3666   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3667 
3668   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3669   if (container) {
3670     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3671   }
3672   bi     = merge->bi;
3673   bj     = merge->bj;
3674   buf_ri = merge->buf_ri;
3675   buf_rj = merge->buf_rj;
3676 
3677   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3678   owners = merge->rowmap.range;
3679   len_s  = merge->len_s;
3680 
3681   /* send and recv matrix values */
3682   /*-----------------------------*/
3683   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3684   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3685 
3686   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3687   for (proc=0,k=0; proc<size; proc++){
3688     if (!len_s[proc]) continue;
3689     i = owners[proc];
3690     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3691     k++;
3692   }
3693 
3694   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3695   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3696   ierr = PetscFree(status);CHKERRQ(ierr);
3697 
3698   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3699   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3700 
3701   /* insert mat values of mpimat */
3702   /*----------------------------*/
3703   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
3704   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3705   nextrow = buf_ri_k + merge->nrecv;
3706   nextai  = nextrow + merge->nrecv;
3707 
3708   for (k=0; k<merge->nrecv; k++){
3709     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3710     nrows = *(buf_ri_k[k]);
3711     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3712     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3713   }
3714 
3715   /* set values of ba */
3716   m = merge->rowmap.n;
3717   for (i=0; i<m; i++) {
3718     arow = owners[rank] + i;
3719     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3720     bnzi = bi[i+1] - bi[i];
3721     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
3722 
3723     /* add local non-zero vals of this proc's seqmat into ba */
3724     anzi = ai[arow+1] - ai[arow];
3725     aj   = a->j + ai[arow];
3726     aa   = a->a + ai[arow];
3727     nextaj = 0;
3728     for (j=0; nextaj<anzi; j++){
3729       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3730         ba_i[j] += aa[nextaj++];
3731       }
3732     }
3733 
3734     /* add received vals into ba */
3735     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3736       /* i-th row */
3737       if (i == *nextrow[k]) {
3738         anzi = *(nextai[k]+1) - *nextai[k];
3739         aj   = buf_rj[k] + *(nextai[k]);
3740         aa   = abuf_r[k] + *(nextai[k]);
3741         nextaj = 0;
3742         for (j=0; nextaj<anzi; j++){
3743           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3744             ba_i[j] += aa[nextaj++];
3745           }
3746         }
3747         nextrow[k]++; nextai[k]++;
3748       }
3749     }
3750     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3751   }
3752   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754 
3755   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3756   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3757   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3758   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 #undef __FUNCT__
3763 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3764 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3765 {
3766   PetscErrorCode       ierr;
3767   Mat                  B_mpi;
3768   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3769   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3770   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3771   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3772   PetscInt             len,proc,*dnz,*onz;
3773   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3774   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3775   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3776   MPI_Status           *status;
3777   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3778   PetscBT              lnkbt;
3779   Mat_Merge_SeqsToMPI  *merge;
3780   PetscContainer       container;
3781 
3782   PetscFunctionBegin;
3783   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3784 
3785   /* make sure it is a PETSc comm */
3786   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3787   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3788   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3789 
3790   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3791   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3792 
3793   /* determine row ownership */
3794   /*---------------------------------------------------------*/
3795   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3796   merge->rowmap.n = m;
3797   merge->rowmap.N = M;
3798   merge->rowmap.bs = 1;
3799   ierr = PetscMapSetUp(&merge->rowmap);CHKERRQ(ierr);
3800   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3801   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3802 
3803   m      = merge->rowmap.n;
3804   M      = merge->rowmap.N;
3805   owners = merge->rowmap.range;
3806 
3807   /* determine the number of messages to send, their lengths */
3808   /*---------------------------------------------------------*/
3809   len_s  = merge->len_s;
3810 
3811   len = 0;  /* length of buf_si[] */
3812   merge->nsend = 0;
3813   for (proc=0; proc<size; proc++){
3814     len_si[proc] = 0;
3815     if (proc == rank){
3816       len_s[proc] = 0;
3817     } else {
3818       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3819       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3820     }
3821     if (len_s[proc]) {
3822       merge->nsend++;
3823       nrows = 0;
3824       for (i=owners[proc]; i<owners[proc+1]; i++){
3825         if (ai[i+1] > ai[i]) nrows++;
3826       }
3827       len_si[proc] = 2*(nrows+1);
3828       len += len_si[proc];
3829     }
3830   }
3831 
3832   /* determine the number and length of messages to receive for ij-structure */
3833   /*-------------------------------------------------------------------------*/
3834   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3835   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3836 
3837   /* post the Irecv of j-structure */
3838   /*-------------------------------*/
3839   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3840   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3841 
3842   /* post the Isend of j-structure */
3843   /*--------------------------------*/
3844   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3845   sj_waits = si_waits + merge->nsend;
3846 
3847   for (proc=0, k=0; proc<size; proc++){
3848     if (!len_s[proc]) continue;
3849     i = owners[proc];
3850     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3851     k++;
3852   }
3853 
3854   /* receives and sends of j-structure are complete */
3855   /*------------------------------------------------*/
3856   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3857   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3858 
3859   /* send and recv i-structure */
3860   /*---------------------------*/
3861   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3862   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3863 
3864   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3865   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3866   for (proc=0,k=0; proc<size; proc++){
3867     if (!len_s[proc]) continue;
3868     /* form outgoing message for i-structure:
3869          buf_si[0]:                 nrows to be sent
3870                [1:nrows]:           row index (global)
3871                [nrows+1:2*nrows+1]: i-structure index
3872     */
3873     /*-------------------------------------------*/
3874     nrows = len_si[proc]/2 - 1;
3875     buf_si_i    = buf_si + nrows+1;
3876     buf_si[0]   = nrows;
3877     buf_si_i[0] = 0;
3878     nrows = 0;
3879     for (i=owners[proc]; i<owners[proc+1]; i++){
3880       anzi = ai[i+1] - ai[i];
3881       if (anzi) {
3882         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3883         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3884         nrows++;
3885       }
3886     }
3887     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3888     k++;
3889     buf_si += len_si[proc];
3890   }
3891 
3892   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3893   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3894 
3895   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3896   for (i=0; i<merge->nrecv; i++){
3897     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3898   }
3899 
3900   ierr = PetscFree(len_si);CHKERRQ(ierr);
3901   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3902   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3903   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3904   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3905   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3906   ierr = PetscFree(status);CHKERRQ(ierr);
3907 
3908   /* compute a local seq matrix in each processor */
3909   /*----------------------------------------------*/
3910   /* allocate bi array and free space for accumulating nonzero column info */
3911   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3912   bi[0] = 0;
3913 
3914   /* create and initialize a linked list */
3915   nlnk = N+1;
3916   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3917 
3918   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3919   len = 0;
3920   len  = ai[owners[rank+1]] - ai[owners[rank]];
3921   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3922   current_space = free_space;
3923 
3924   /* determine symbolic info for each local row */
3925   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3926   nextrow = buf_ri_k + merge->nrecv;
3927   nextai  = nextrow + merge->nrecv;
3928   for (k=0; k<merge->nrecv; k++){
3929     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3930     nrows = *buf_ri_k[k];
3931     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3932     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3933   }
3934 
3935   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3936   len = 0;
3937   for (i=0;i<m;i++) {
3938     bnzi   = 0;
3939     /* add local non-zero cols of this proc's seqmat into lnk */
3940     arow   = owners[rank] + i;
3941     anzi   = ai[arow+1] - ai[arow];
3942     aj     = a->j + ai[arow];
3943     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3944     bnzi += nlnk;
3945     /* add received col data into lnk */
3946     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3947       if (i == *nextrow[k]) { /* i-th row */
3948         anzi = *(nextai[k]+1) - *nextai[k];
3949         aj   = buf_rj[k] + *nextai[k];
3950         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3951         bnzi += nlnk;
3952         nextrow[k]++; nextai[k]++;
3953       }
3954     }
3955     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3956 
3957     /* if free space is not available, make more free space */
3958     if (current_space->local_remaining<bnzi) {
3959       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3960       nspacedouble++;
3961     }
3962     /* copy data into free space, then initialize lnk */
3963     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3964     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3965 
3966     current_space->array           += bnzi;
3967     current_space->local_used      += bnzi;
3968     current_space->local_remaining -= bnzi;
3969 
3970     bi[i+1] = bi[i] + bnzi;
3971   }
3972 
3973   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3974 
3975   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3976   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3977   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3978 
3979   /* create symbolic parallel matrix B_mpi */
3980   /*---------------------------------------*/
3981   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3982   if (n==PETSC_DECIDE) {
3983     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3984   } else {
3985     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3986   }
3987   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3988   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3989   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3990 
3991   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3992   B_mpi->assembled     = PETSC_FALSE;
3993   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3994   merge->bi            = bi;
3995   merge->bj            = bj;
3996   merge->buf_ri        = buf_ri;
3997   merge->buf_rj        = buf_rj;
3998   merge->coi           = PETSC_NULL;
3999   merge->coj           = PETSC_NULL;
4000   merge->owners_co     = PETSC_NULL;
4001 
4002   /* attach the supporting struct to B_mpi for reuse */
4003   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4004   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4005   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4006   *mpimat = B_mpi;
4007 
4008   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4009   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4010   PetscFunctionReturn(0);
4011 }
4012 
4013 #undef __FUNCT__
4014 #define __FUNCT__ "MatMerge_SeqsToMPI"
4015 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4016 {
4017   PetscErrorCode   ierr;
4018 
4019   PetscFunctionBegin;
4020   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4021   if (scall == MAT_INITIAL_MATRIX){
4022     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4023   }
4024   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
4025   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4026   PetscFunctionReturn(0);
4027 }
4028 
4029 #undef __FUNCT__
4030 #define __FUNCT__ "MatGetLocalMat"
4031 /*@
4032      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
4033 
4034     Not Collective
4035 
4036    Input Parameters:
4037 +    A - the matrix
4038 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4039 
4040    Output Parameter:
4041 .    A_loc - the local sequential matrix generated
4042 
4043     Level: developer
4044 
4045 @*/
4046 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4047 {
4048   PetscErrorCode  ierr;
4049   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
4050   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
4051   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
4052   MatScalar       *aa=a->a,*ba=b->a,*cam;
4053   PetscScalar     *ca;
4054   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
4055   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
4056 
4057   PetscFunctionBegin;
4058   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4059   if (scall == MAT_INITIAL_MATRIX){
4060     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
4061     ci[0] = 0;
4062     for (i=0; i<am; i++){
4063       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4064     }
4065     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
4066     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
4067     k = 0;
4068     for (i=0; i<am; i++) {
4069       ncols_o = bi[i+1] - bi[i];
4070       ncols_d = ai[i+1] - ai[i];
4071       /* off-diagonal portion of A */
4072       for (jo=0; jo<ncols_o; jo++) {
4073         col = cmap[*bj];
4074         if (col >= cstart) break;
4075         cj[k]   = col; bj++;
4076         ca[k++] = *ba++;
4077       }
4078       /* diagonal portion of A */
4079       for (j=0; j<ncols_d; j++) {
4080         cj[k]   = cstart + *aj++;
4081         ca[k++] = *aa++;
4082       }
4083       /* off-diagonal portion of A */
4084       for (j=jo; j<ncols_o; j++) {
4085         cj[k]   = cmap[*bj++];
4086         ca[k++] = *ba++;
4087       }
4088     }
4089     /* put together the new matrix */
4090     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4091     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4092     /* Since these are PETSc arrays, change flags to free them as necessary. */
4093     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4094     mat->free_a  = PETSC_TRUE;
4095     mat->free_ij = PETSC_TRUE;
4096     mat->nonew   = 0;
4097   } else if (scall == MAT_REUSE_MATRIX){
4098     mat=(Mat_SeqAIJ*)(*A_loc)->data;
4099     ci = mat->i; cj = mat->j; cam = mat->a;
4100     for (i=0; i<am; i++) {
4101       /* off-diagonal portion of A */
4102       ncols_o = bi[i+1] - bi[i];
4103       for (jo=0; jo<ncols_o; jo++) {
4104         col = cmap[*bj];
4105         if (col >= cstart) break;
4106         *cam++ = *ba++; bj++;
4107       }
4108       /* diagonal portion of A */
4109       ncols_d = ai[i+1] - ai[i];
4110       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
4111       /* off-diagonal portion of A */
4112       for (j=jo; j<ncols_o; j++) {
4113         *cam++ = *ba++; bj++;
4114       }
4115     }
4116   } else {
4117     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4118   }
4119 
4120   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4121   PetscFunctionReturn(0);
4122 }
4123 
4124 #undef __FUNCT__
4125 #define __FUNCT__ "MatGetLocalMatCondensed"
4126 /*@C
4127      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4128 
4129     Not Collective
4130 
4131    Input Parameters:
4132 +    A - the matrix
4133 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4134 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4135 
4136    Output Parameter:
4137 .    A_loc - the local sequential matrix generated
4138 
4139     Level: developer
4140 
4141 @*/
4142 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4143 {
4144   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4145   PetscErrorCode    ierr;
4146   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4147   IS                isrowa,iscola;
4148   Mat               *aloc;
4149 
4150   PetscFunctionBegin;
4151   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4152   if (!row){
4153     start = A->rmap.rstart; end = A->rmap.rend;
4154     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4155   } else {
4156     isrowa = *row;
4157   }
4158   if (!col){
4159     start = A->cmap.rstart;
4160     cmap  = a->garray;
4161     nzA   = a->A->cmap.n;
4162     nzB   = a->B->cmap.n;
4163     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4164     ncols = 0;
4165     for (i=0; i<nzB; i++) {
4166       if (cmap[i] < start) idx[ncols++] = cmap[i];
4167       else break;
4168     }
4169     imark = i;
4170     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4171     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4172     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4173     ierr = PetscFree(idx);CHKERRQ(ierr);
4174   } else {
4175     iscola = *col;
4176   }
4177   if (scall != MAT_INITIAL_MATRIX){
4178     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4179     aloc[0] = *A_loc;
4180   }
4181   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4182   *A_loc = aloc[0];
4183   ierr = PetscFree(aloc);CHKERRQ(ierr);
4184   if (!row){
4185     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4186   }
4187   if (!col){
4188     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4189   }
4190   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4191   PetscFunctionReturn(0);
4192 }
4193 
4194 #undef __FUNCT__
4195 #define __FUNCT__ "MatGetBrowsOfAcols"
4196 /*@C
4197     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4198 
4199     Collective on Mat
4200 
4201    Input Parameters:
4202 +    A,B - the matrices in mpiaij format
4203 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4204 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4205 
4206    Output Parameter:
4207 +    rowb, colb - index sets of rows and columns of B to extract
4208 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4209 -    B_seq - the sequential matrix generated
4210 
4211     Level: developer
4212 
4213 @*/
4214 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4215 {
4216   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4217   PetscErrorCode    ierr;
4218   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4219   IS                isrowb,iscolb;
4220   Mat               *bseq;
4221 
4222   PetscFunctionBegin;
4223   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4224     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4225   }
4226   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4227 
4228   if (scall == MAT_INITIAL_MATRIX){
4229     start = A->cmap.rstart;
4230     cmap  = a->garray;
4231     nzA   = a->A->cmap.n;
4232     nzB   = a->B->cmap.n;
4233     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4234     ncols = 0;
4235     for (i=0; i<nzB; i++) {  /* row < local row index */
4236       if (cmap[i] < start) idx[ncols++] = cmap[i];
4237       else break;
4238     }
4239     imark = i;
4240     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4241     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4242     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4243     ierr = PetscFree(idx);CHKERRQ(ierr);
4244     *brstart = imark;
4245     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4246   } else {
4247     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4248     isrowb = *rowb; iscolb = *colb;
4249     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4250     bseq[0] = *B_seq;
4251   }
4252   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4253   *B_seq = bseq[0];
4254   ierr = PetscFree(bseq);CHKERRQ(ierr);
4255   if (!rowb){
4256     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4257   } else {
4258     *rowb = isrowb;
4259   }
4260   if (!colb){
4261     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4262   } else {
4263     *colb = iscolb;
4264   }
4265   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4266   PetscFunctionReturn(0);
4267 }
4268 
4269 #undef __FUNCT__
4270 #define __FUNCT__ "MatGetBrowsOfAoCols"
4271 /*@C
4272     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4273     of the OFF-DIAGONAL portion of local A
4274 
4275     Collective on Mat
4276 
4277    Input Parameters:
4278 +    A,B - the matrices in mpiaij format
4279 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4280 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4281 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4282 
4283    Output Parameter:
4284 +    B_oth - the sequential matrix generated
4285 
4286     Level: developer
4287 
4288 @*/
4289 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4290 {
4291   VecScatter_MPI_General *gen_to,*gen_from;
4292   PetscErrorCode         ierr;
4293   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4294   Mat_SeqAIJ             *b_oth;
4295   VecScatter             ctx=a->Mvctx;
4296   MPI_Comm               comm=((PetscObject)ctx)->comm;
4297   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
4298   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4299   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4300   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4301   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4302   MPI_Status             *sstatus,rstatus;
4303   PetscMPIInt            jj;
4304   PetscInt               *cols,sbs,rbs;
4305   PetscScalar            *vals;
4306 
4307   PetscFunctionBegin;
4308   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4309     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4310   }
4311   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4312   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4313 
4314   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4315   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4316   rvalues  = gen_from->values; /* holds the length of receiving row */
4317   svalues  = gen_to->values;   /* holds the length of sending row */
4318   nrecvs   = gen_from->n;
4319   nsends   = gen_to->n;
4320 
4321   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4322   srow     = gen_to->indices;   /* local row index to be sent */
4323   sstarts  = gen_to->starts;
4324   sprocs   = gen_to->procs;
4325   sstatus  = gen_to->sstatus;
4326   sbs      = gen_to->bs;
4327   rstarts  = gen_from->starts;
4328   rprocs   = gen_from->procs;
4329   rbs      = gen_from->bs;
4330 
4331   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4332   if (scall == MAT_INITIAL_MATRIX){
4333     /* i-array */
4334     /*---------*/
4335     /*  post receives */
4336     for (i=0; i<nrecvs; i++){
4337       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4338       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4339       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4340     }
4341 
4342     /* pack the outgoing message */
4343     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4344     rstartsj = sstartsj + nsends +1;
4345     sstartsj[0] = 0;  rstartsj[0] = 0;
4346     len = 0; /* total length of j or a array to be sent */
4347     k = 0;
4348     for (i=0; i<nsends; i++){
4349       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4350       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4351       for (j=0; j<nrows; j++) {
4352         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4353         for (l=0; l<sbs; l++){
4354           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4355           rowlen[j*sbs+l] = ncols;
4356           len += ncols;
4357           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4358         }
4359         k++;
4360       }
4361       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4362       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4363     }
4364     /* recvs and sends of i-array are completed */
4365     i = nrecvs;
4366     while (i--) {
4367       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4368     }
4369     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4370 
4371     /* allocate buffers for sending j and a arrays */
4372     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4373     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4374 
4375     /* create i-array of B_oth */
4376     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4377     b_othi[0] = 0;
4378     len = 0; /* total length of j or a array to be received */
4379     k = 0;
4380     for (i=0; i<nrecvs; i++){
4381       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4382       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4383       for (j=0; j<nrows; j++) {
4384         b_othi[k+1] = b_othi[k] + rowlen[j];
4385         len += rowlen[j]; k++;
4386       }
4387       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4388     }
4389 
4390     /* allocate space for j and a arrrays of B_oth */
4391     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4392     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4393 
4394     /* j-array */
4395     /*---------*/
4396     /*  post receives of j-array */
4397     for (i=0; i<nrecvs; i++){
4398       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4399       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4400     }
4401 
4402     /* pack the outgoing message j-array */
4403     k = 0;
4404     for (i=0; i<nsends; i++){
4405       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4406       bufJ = bufj+sstartsj[i];
4407       for (j=0; j<nrows; j++) {
4408         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4409         for (ll=0; ll<sbs; ll++){
4410           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4411           for (l=0; l<ncols; l++){
4412             *bufJ++ = cols[l];
4413           }
4414           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4415         }
4416       }
4417       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4418     }
4419 
4420     /* recvs and sends of j-array are completed */
4421     i = nrecvs;
4422     while (i--) {
4423       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4424     }
4425     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4426   } else if (scall == MAT_REUSE_MATRIX){
4427     sstartsj = *startsj;
4428     rstartsj = sstartsj + nsends +1;
4429     bufa     = *bufa_ptr;
4430     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4431     b_otha   = b_oth->a;
4432   } else {
4433     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4434   }
4435 
4436   /* a-array */
4437   /*---------*/
4438   /*  post receives of a-array */
4439   for (i=0; i<nrecvs; i++){
4440     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4441     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4442   }
4443 
4444   /* pack the outgoing message a-array */
4445   k = 0;
4446   for (i=0; i<nsends; i++){
4447     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4448     bufA = bufa+sstartsj[i];
4449     for (j=0; j<nrows; j++) {
4450       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4451       for (ll=0; ll<sbs; ll++){
4452         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4453         for (l=0; l<ncols; l++){
4454           *bufA++ = vals[l];
4455         }
4456         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4457       }
4458     }
4459     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4460   }
4461   /* recvs and sends of a-array are completed */
4462   i = nrecvs;
4463   while (i--) {
4464     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
4465   }
4466   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4467   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4468 
4469   if (scall == MAT_INITIAL_MATRIX){
4470     /* put together the new matrix */
4471     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4472 
4473     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4474     /* Since these are PETSc arrays, change flags to free them as necessary. */
4475     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4476     b_oth->free_a  = PETSC_TRUE;
4477     b_oth->free_ij = PETSC_TRUE;
4478     b_oth->nonew   = 0;
4479 
4480     ierr = PetscFree(bufj);CHKERRQ(ierr);
4481     if (!startsj || !bufa_ptr){
4482       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4483       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4484     } else {
4485       *startsj  = sstartsj;
4486       *bufa_ptr = bufa;
4487     }
4488   }
4489   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 #undef __FUNCT__
4494 #define __FUNCT__ "MatGetCommunicationStructs"
4495 /*@C
4496   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4497 
4498   Not Collective
4499 
4500   Input Parameters:
4501 . A - The matrix in mpiaij format
4502 
4503   Output Parameter:
4504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4505 . colmap - A map from global column index to local index into lvec
4506 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4507 
4508   Level: developer
4509 
4510 @*/
4511 #if defined (PETSC_USE_CTABLE)
4512 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4513 #else
4514 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4515 #endif
4516 {
4517   Mat_MPIAIJ *a;
4518 
4519   PetscFunctionBegin;
4520   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4521   PetscValidPointer(lvec, 2)
4522   PetscValidPointer(colmap, 3)
4523   PetscValidPointer(multScatter, 4)
4524   a = (Mat_MPIAIJ *) A->data;
4525   if (lvec) *lvec = a->lvec;
4526   if (colmap) *colmap = a->colmap;
4527   if (multScatter) *multScatter = a->Mvctx;
4528   PetscFunctionReturn(0);
4529 }
4530 
4531 EXTERN_C_BEGIN
4532 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4533 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4534 EXTERN_C_END
4535 
4536 /*MC
4537    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4538 
4539    Options Database Keys:
4540 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4541 
4542   Level: beginner
4543 
4544 .seealso: MatCreateMPIAIJ()
4545 M*/
4546 
4547 EXTERN_C_BEGIN
4548 #undef __FUNCT__
4549 #define __FUNCT__ "MatCreate_MPIAIJ"
4550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4551 {
4552   Mat_MPIAIJ     *b;
4553   PetscErrorCode ierr;
4554   PetscMPIInt    size;
4555 
4556   PetscFunctionBegin;
4557   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
4558 
4559   ierr            = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
4560   B->data         = (void*)b;
4561   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4562   B->factor       = 0;
4563   B->rmap.bs      = 1;
4564   B->assembled    = PETSC_FALSE;
4565   B->mapping      = 0;
4566 
4567   B->insertmode      = NOT_SET_VALUES;
4568   b->size            = size;
4569   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
4570 
4571   /* build cache for off array entries formed */
4572   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
4573   b->donotstash  = PETSC_FALSE;
4574   b->colmap      = 0;
4575   b->garray      = 0;
4576   b->roworiented = PETSC_TRUE;
4577 
4578   /* stuff used for matrix vector multiply */
4579   b->lvec      = PETSC_NULL;
4580   b->Mvctx     = PETSC_NULL;
4581 
4582   /* stuff for MatGetRow() */
4583   b->rowindices   = 0;
4584   b->rowvalues    = 0;
4585   b->getrowactive = PETSC_FALSE;
4586 
4587 
4588   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4589                                      "MatStoreValues_MPIAIJ",
4590                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4591   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4592                                      "MatRetrieveValues_MPIAIJ",
4593                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4594   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4595 				     "MatGetDiagonalBlock_MPIAIJ",
4596                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4597   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4598 				     "MatIsTranspose_MPIAIJ",
4599 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4600   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4601 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4602 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4603   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4604 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4605 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4606   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4607 				     "MatDiagonalScaleLocal_MPIAIJ",
4608 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4609   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4610                                      "MatConvert_MPIAIJ_MPICSRPERM",
4611                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4612   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4613                                      "MatConvert_MPIAIJ_MPICRL",
4614                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4615   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4616   PetscFunctionReturn(0);
4617 }
4618 EXTERN_C_END
4619 
4620 #undef __FUNCT__
4621 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4622 /*@
4623      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4624          and "off-diagonal" part of the matrix in CSR format.
4625 
4626    Collective on MPI_Comm
4627 
4628    Input Parameters:
4629 +  comm - MPI communicator
4630 .  m - number of local rows (Cannot be PETSC_DECIDE)
4631 .  n - This value should be the same as the local size used in creating the
4632        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4633        calculated if N is given) For square matrices n is almost always m.
4634 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4635 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4636 .   i - row indices for "diagonal" portion of matrix
4637 .   j - column indices
4638 .   a - matrix values
4639 .   oi - row indices for "off-diagonal" portion of matrix
4640 .   oj - column indices
4641 -   oa - matrix values
4642 
4643    Output Parameter:
4644 .   mat - the matrix
4645 
4646    Level: advanced
4647 
4648    Notes:
4649        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4650 
4651        The i and j indices are 0 based
4652 
4653        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4654 
4655 
4656 .keywords: matrix, aij, compressed row, sparse, parallel
4657 
4658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4659           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4660 @*/
4661 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4662 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4663 {
4664   PetscErrorCode ierr;
4665   Mat_MPIAIJ     *maij;
4666 
4667  PetscFunctionBegin;
4668   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4669   if (i[0]) {
4670     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4671   }
4672   if (oi[0]) {
4673     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4674   }
4675   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4676   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4677   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4678   maij = (Mat_MPIAIJ*) (*mat)->data;
4679   maij->donotstash     = PETSC_TRUE;
4680   (*mat)->preallocated = PETSC_TRUE;
4681 
4682   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4683   ierr = PetscMapSetUp(&(*mat)->rmap);CHKERRQ(ierr);
4684   ierr = PetscMapSetUp(&(*mat)->cmap);CHKERRQ(ierr);
4685 
4686   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4687   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4688 
4689   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4690   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4691   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4692   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4693 
4694   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4695   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4696   PetscFunctionReturn(0);
4697 }
4698 
4699 /*
4700     Special version for direct calls from Fortran
4701 */
4702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4706 #endif
4707 
4708 /* Change these macros so can be used in void function */
4709 #undef CHKERRQ
4710 #define CHKERRQ(ierr) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4711 #undef SETERRQ2
4712 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4713 #undef SETERRQ
4714 #define SETERRQ(ierr,b) CHKERRABORT(((PetscObject)mat)->comm,ierr)
4715 
4716 EXTERN_C_BEGIN
4717 #undef __FUNCT__
4718 #define __FUNCT__ "matsetvaluesmpiaij_"
4719 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4720 {
4721   Mat            mat = *mmat;
4722   PetscInt       m = *mm, n = *mn;
4723   InsertMode     addv = *maddv;
4724   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4725   PetscScalar    value;
4726   PetscErrorCode ierr;
4727 
4728   MatPreallocated(mat);
4729   if (mat->insertmode == NOT_SET_VALUES) {
4730     mat->insertmode = addv;
4731   }
4732 #if defined(PETSC_USE_DEBUG)
4733   else if (mat->insertmode != addv) {
4734     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4735   }
4736 #endif
4737   {
4738   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4739   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4740   PetscTruth     roworiented = aij->roworiented;
4741 
4742   /* Some Variables required in the macro */
4743   Mat            A = aij->A;
4744   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4745   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4746   PetscScalar    *aa = a->a;
4747   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4748   Mat            B = aij->B;
4749   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4750   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4751   PetscScalar    *ba = b->a;
4752 
4753   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4754   PetscInt       nonew = a->nonew;
4755   PetscScalar    *ap1,*ap2;
4756 
4757   PetscFunctionBegin;
4758   for (i=0; i<m; i++) {
4759     if (im[i] < 0) continue;
4760 #if defined(PETSC_USE_DEBUG)
4761     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4762 #endif
4763     if (im[i] >= rstart && im[i] < rend) {
4764       row      = im[i] - rstart;
4765       lastcol1 = -1;
4766       rp1      = aj + ai[row];
4767       ap1      = aa + ai[row];
4768       rmax1    = aimax[row];
4769       nrow1    = ailen[row];
4770       low1     = 0;
4771       high1    = nrow1;
4772       lastcol2 = -1;
4773       rp2      = bj + bi[row];
4774       ap2      = ba + bi[row];
4775       rmax2    = bimax[row];
4776       nrow2    = bilen[row];
4777       low2     = 0;
4778       high2    = nrow2;
4779 
4780       for (j=0; j<n; j++) {
4781         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4782         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4783         if (in[j] >= cstart && in[j] < cend){
4784           col = in[j] - cstart;
4785           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4786         } else if (in[j] < 0) continue;
4787 #if defined(PETSC_USE_DEBUG)
4788         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4789 #endif
4790         else {
4791           if (mat->was_assembled) {
4792             if (!aij->colmap) {
4793               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4794             }
4795 #if defined (PETSC_USE_CTABLE)
4796             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4797 	    col--;
4798 #else
4799             col = aij->colmap[in[j]] - 1;
4800 #endif
4801             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4802               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4803               col =  in[j];
4804               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4805               B = aij->B;
4806               b = (Mat_SeqAIJ*)B->data;
4807               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4808               rp2      = bj + bi[row];
4809               ap2      = ba + bi[row];
4810               rmax2    = bimax[row];
4811               nrow2    = bilen[row];
4812               low2     = 0;
4813               high2    = nrow2;
4814               bm       = aij->B->rmap.n;
4815               ba = b->a;
4816             }
4817           } else col = in[j];
4818           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4819         }
4820       }
4821     } else {
4822       if (!aij->donotstash) {
4823         if (roworiented) {
4824           if (ignorezeroentries && v[i*n] == 0.0) continue;
4825           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4826         } else {
4827           if (ignorezeroentries && v[i] == 0.0) continue;
4828           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4829         }
4830       }
4831     }
4832   }}
4833   PetscFunctionReturnVoid();
4834 }
4835 EXTERN_C_END
4836 
4837