xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 38baddfdda9fcd85a080f141519fb97fb01bc0a4)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int            i,j,rstart,ncols,n,flg;
351   int            *row,*col,other_disassembled;
352   PetscScalar    *val;
353   InsertMode     addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387       /* reaccess the b because aij->B was changed */
388       b    = (Mat_SeqAIJ *)aij->B->data;
389     }
390   }
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
394   }
395   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
396   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
397 
398   if (aij->rowvalues) {
399     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
400     aij->rowvalues = 0;
401   }
402 
403   /* used by MatAXPY() */
404   a->xtoy = 0; b->xtoy = 0;
405   a->XtoY = 0; b->XtoY = 0;
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
412 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
413 {
414   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
419   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatZeroRows_MPIAIJ"
425 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
426 {
427   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
428   PetscErrorCode ierr;
429   int            i,N,*rows,*owners = l->rowners,size = l->size;
430   int            *nprocs,j,idx,nsends,row;
431   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
432   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
433   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
434   MPI_Comm       comm = A->comm;
435   MPI_Request    *send_waits,*recv_waits;
436   MPI_Status     recv_status,*send_status;
437   IS             istmp;
438   PetscTruth     found;
439 
440   PetscFunctionBegin;
441   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
442   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
443 
444   /*  first count number of contributors to each processor */
445   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
446   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
447   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
448   for (i=0; i<N; i++) {
449     idx = rows[i];
450     found = PETSC_FALSE;
451     for (j=0; j<size; j++) {
452       if (idx >= owners[j] && idx < owners[j+1]) {
453         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
454       }
455     }
456     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
457   }
458   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
459 
460   /* inform other processors of number of messages and max length*/
461   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (nprocs[2*i+1]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   PetscErrorCode ierr;
577   int        nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != A->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatMultAdd_MPIAIJ"
593 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   PetscErrorCode ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
608 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   PetscErrorCode ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
630 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
631 {
632   MPI_Comm comm;
633   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
634   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
635   IS         Me,Notme;
636   PetscErrorCode ierr;
637   int        M,N,first,last,*notme,ntids,i;
638 
639   PetscFunctionBegin;
640 
641   /* Easy test: symmetric diagonal block */
642   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
643   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
644   if (!*f) PetscFunctionReturn(0);
645   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
646   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
647   if (ntids==1) PetscFunctionReturn(0);
648 
649   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
650   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
651   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
652   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
653   for (i=0; i<first; i++) notme[i] = i;
654   for (i=last; i<M; i++) notme[i-last+first] = i;
655   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
656   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
657   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
658   Aoff = Aoffs[0];
659   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
660   Boff = Boffs[0];
661   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
662   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
663   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
664   ierr = ISDestroy(Me);CHKERRQ(ierr);
665   ierr = ISDestroy(Notme);CHKERRQ(ierr);
666 
667   PetscFunctionReturn(0);
668 }
669 EXTERN_C_END
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
673 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
676   PetscErrorCode ierr;
677 
678   PetscFunctionBegin;
679   /* do nondiagonal part */
680   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
681   /* send it on its way */
682   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
683   /* do local part */
684   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
685   /* receive remote parts: note this assumes the values are not actually */
686   /* inserted in yy until the next line, which is true for my implementation*/
687   /* but is not perhaps always true. */
688   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 /*
693   This only works correctly for square matrices where the subblock A->A is the
694    diagonal block
695 */
696 #undef __FUNCT__
697 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
698 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
699 {
700   PetscErrorCode ierr;
701   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
702 
703   PetscFunctionBegin;
704   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
707   }
708   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatScale_MPIAIJ"
714 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
721   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatDestroy_MPIAIJ"
727 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
728 {
729   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733 #if defined(PETSC_USE_LOG)
734   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
735 #endif
736   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
737   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
738   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
739   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
740 #if defined (PETSC_USE_CTABLE)
741   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
742 #else
743   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
744 #endif
745   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
746   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
747   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
748   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
749   ierr = PetscFree(aij);CHKERRQ(ierr);
750 
751   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
752   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatView_MPIAIJ_Binary"
762 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
763 {
764   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
765   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
766   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
767   PetscErrorCode    ierr;
768   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
769   int               fd;
770   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
771   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
772   PetscScalar       *column_values;
773 
774   PetscFunctionBegin;
775   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
776   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
777   nz   = A->nz + B->nz;
778   if (!rank) {
779     header[0] = MAT_FILE_COOKIE;
780     header[1] = mat->M;
781     header[2] = mat->N;
782     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
783     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
784     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     /* get largest number of rows any processor has */
786     rlen = mat->m;
787     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = PetscMax(rlen,range[i+1] - range[i]);
790     }
791   } else {
792     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
793     rlen = mat->m;
794   }
795 
796   /* load up the local row counts */
797   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
798   for (i=0; i<mat->m; i++) {
799     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
800   }
801 
802   /* store the row lengths to the file */
803   if (!rank) {
804     MPI_Status status;
805     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
806     for (i=1; i<size; i++) {
807       rlen = range[i+1] - range[i];
808       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
809       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
810     }
811   } else {
812     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
815 
816   /* load up the local column indices */
817   nzmax = nz; /* )th processor needs space a largest processor needs */
818   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
819   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
820   cnt  = 0;
821   for (i=0; i<mat->m; i++) {
822     for (j=B->i[i]; j<B->i[i+1]; j++) {
823       if ( (col = garray[B->j[j]]) > cstart) break;
824       column_indices[cnt++] = col;
825     }
826     for (k=A->i[i]; k<A->i[i+1]; k++) {
827       column_indices[cnt++] = A->j[k] + cstart;
828     }
829     for (; j<B->i[i+1]; j++) {
830       column_indices[cnt++] = garray[B->j[j]];
831     }
832   }
833   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
834 
835   /* store the column indices to the file */
836   if (!rank) {
837     MPI_Status status;
838     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
839     for (i=1; i<size; i++) {
840       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
842       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
843       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
844     }
845   } else {
846     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
847     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
848   }
849   ierr = PetscFree(column_indices);CHKERRQ(ierr);
850 
851   /* load up the local column values */
852   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
853   cnt  = 0;
854   for (i=0; i<mat->m; i++) {
855     for (j=B->i[i]; j<B->i[i+1]; j++) {
856       if ( garray[B->j[j]] > cstart) break;
857       column_values[cnt++] = B->a[j];
858     }
859     for (k=A->i[i]; k<A->i[i+1]; k++) {
860       column_values[cnt++] = A->a[k];
861     }
862     for (; j<B->i[i+1]; j++) {
863       column_values[cnt++] = B->a[j];
864     }
865   }
866   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
867 
868   /* store the column values to the file */
869   if (!rank) {
870     MPI_Status status;
871     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
872     for (i=1; i<size; i++) {
873       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
875       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
876       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
877     }
878   } else {
879     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
880     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(column_values);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
888 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
889 {
890   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode    ierr;
892   PetscMPIInt       rank = aij->rank,size = aij->size;
893   PetscTruth        isdraw,iascii,flg,isbinary;
894   PetscViewer       sviewer;
895   PetscViewerFormat format;
896 
897   PetscFunctionBegin;
898   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
899   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
900   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
901   if (iascii) {
902     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
903     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
904       MatInfo info;
905       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
906       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
908       if (flg) {
909         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
910 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
911       } else {
912         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
913 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
914       }
915       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
916       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
917       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
918       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
919       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
920       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
921       PetscFunctionReturn(0);
922     } else if (format == PETSC_VIEWER_ASCII_INFO) {
923       PetscFunctionReturn(0);
924     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
925       PetscFunctionReturn(0);
926     }
927   } else if (isbinary) {
928     if (size == 1) {
929       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
930       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
931     } else {
932       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
933     }
934     PetscFunctionReturn(0);
935   } else if (isdraw) {
936     PetscDraw  draw;
937     PetscTruth isnull;
938     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
939     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
940   }
941 
942   if (size == 1) {
943     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
944     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
945   } else {
946     /* assemble the entire matrix onto first processor. */
947     Mat         A;
948     Mat_SeqAIJ *Aloc;
949     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
950     PetscScalar *a;
951 
952     if (!rank) {
953       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
954     } else {
955       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
956     }
957     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
958     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
959     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
960     PetscLogObjectParent(mat,A);
961 
962     /* copy over the A part */
963     Aloc = (Mat_SeqAIJ*)aij->A->data;
964     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
965     row = aij->rstart;
966     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
967     for (i=0; i<m; i++) {
968       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
969       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
970     }
971     aj = Aloc->j;
972     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
973 
974     /* copy over the B part */
975     Aloc = (Mat_SeqAIJ*)aij->B->data;
976     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
977     row  = aij->rstart;
978     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
979     ct   = cols;
980     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
984     }
985     ierr = PetscFree(ct);CHKERRQ(ierr);
986     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988     /*
989        Everyone has to call to draw the matrix since the graphics waits are
990        synchronized across all processors that share the PetscDraw object
991     */
992     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
993     if (!rank) {
994       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
995       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
996     }
997     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
998     ierr = MatDestroy(A);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatView_MPIAIJ"
1005 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1006 {
1007   PetscErrorCode ierr;
1008   PetscTruth iascii,isdraw,issocket,isbinary;
1009 
1010   PetscFunctionBegin;
1011   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1012   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1013   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1014   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1015   if (iascii || isdraw || isbinary || issocket) {
1016     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1017   } else {
1018     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatRelax_MPIAIJ"
1027 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1028 {
1029   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1030   PetscErrorCode ierr;
1031   Vec          bb1;
1032   PetscScalar  mone=-1.0;
1033 
1034   PetscFunctionBegin;
1035   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1036 
1037   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1038 
1039   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1040     if (flag & SOR_ZERO_INITIAL_GUESS) {
1041       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1042       its--;
1043     }
1044 
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1058     if (flag & SOR_ZERO_INITIAL_GUESS) {
1059       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1060       its--;
1061     }
1062     while (its--) {
1063       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1064       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1065 
1066       /* update rhs: bb1 = bb - B*x */
1067       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1068       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1069 
1070       /* local sweep */
1071       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1072       CHKERRQ(ierr);
1073     }
1074   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1075     if (flag & SOR_ZERO_INITIAL_GUESS) {
1076       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1077       its--;
1078     }
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else {
1092     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1093   }
1094 
1095   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1101 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1104   Mat        A = mat->A,B = mat->B;
1105   PetscErrorCode ierr;
1106   PetscReal  isend[5],irecv[5];
1107 
1108   PetscFunctionBegin;
1109   info->block_size     = 1.0;
1110   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1111   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1112   isend[3] = info->memory;  isend[4] = info->mallocs;
1113   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1114   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1115   isend[3] += info->memory;  isend[4] += info->mallocs;
1116   if (flag == MAT_LOCAL) {
1117     info->nz_used      = isend[0];
1118     info->nz_allocated = isend[1];
1119     info->nz_unneeded  = isend[2];
1120     info->memory       = isend[3];
1121     info->mallocs      = isend[4];
1122   } else if (flag == MAT_GLOBAL_MAX) {
1123     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1124     info->nz_used      = irecv[0];
1125     info->nz_allocated = irecv[1];
1126     info->nz_unneeded  = irecv[2];
1127     info->memory       = irecv[3];
1128     info->mallocs      = irecv[4];
1129   } else if (flag == MAT_GLOBAL_SUM) {
1130     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1131     info->nz_used      = irecv[0];
1132     info->nz_allocated = irecv[1];
1133     info->nz_unneeded  = irecv[2];
1134     info->memory       = irecv[3];
1135     info->mallocs      = irecv[4];
1136   }
1137   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1138   info->fill_ratio_needed = 0;
1139   info->factor_mallocs    = 0;
1140   info->rows_global       = (double)matin->M;
1141   info->columns_global    = (double)matin->N;
1142   info->rows_local        = (double)matin->m;
1143   info->columns_local     = (double)matin->N;
1144 
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatSetOption_MPIAIJ"
1150 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1151 {
1152   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   switch (op) {
1157   case MAT_NO_NEW_NONZERO_LOCATIONS:
1158   case MAT_YES_NEW_NONZERO_LOCATIONS:
1159   case MAT_COLUMNS_UNSORTED:
1160   case MAT_COLUMNS_SORTED:
1161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1162   case MAT_KEEP_ZEROED_ROWS:
1163   case MAT_NEW_NONZERO_LOCATION_ERR:
1164   case MAT_USE_INODES:
1165   case MAT_DO_NOT_USE_INODES:
1166   case MAT_IGNORE_ZERO_ENTRIES:
1167     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1168     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_ROW_ORIENTED:
1171     a->roworiented = PETSC_TRUE;
1172     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1173     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1174     break;
1175   case MAT_ROWS_SORTED:
1176   case MAT_ROWS_UNSORTED:
1177   case MAT_YES_NEW_DIAGONALS:
1178     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1179     break;
1180   case MAT_COLUMN_ORIENTED:
1181     a->roworiented = PETSC_FALSE;
1182     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1183     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = PETSC_TRUE;
1187     break;
1188   case MAT_NO_NEW_DIAGONALS:
1189     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1190   case MAT_SYMMETRIC:
1191   case MAT_STRUCTURALLY_SYMMETRIC:
1192   case MAT_HERMITIAN:
1193   case MAT_SYMMETRY_ETERNAL:
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_NOT_SYMMETRIC:
1197   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1198   case MAT_NOT_HERMITIAN:
1199   case MAT_NOT_SYMMETRY_ETERNAL:
1200     break;
1201   default:
1202     SETERRQ(PETSC_ERR_SUP,"unknown option");
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatGetRow_MPIAIJ"
1209 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1210 {
1211   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1213   PetscErrorCode ierr;
1214   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1215   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1216   int          *cmap,*idx_p;
1217 
1218   PetscFunctionBegin;
1219   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1220   mat->getrowactive = PETSC_TRUE;
1221 
1222   if (!mat->rowvalues && (idx || v)) {
1223     /*
1224         allocate enough space to hold information from the longest row.
1225     */
1226     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1227     int     max = 1,tmp;
1228     for (i=0; i<matin->m; i++) {
1229       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1230       if (max < tmp) { max = tmp; }
1231     }
1232     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1233     mat->rowindices = (int*)(mat->rowvalues + max);
1234   }
1235 
1236   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1237   lrow = row - rstart;
1238 
1239   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1240   if (!v)   {pvA = 0; pvB = 0;}
1241   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1242   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1243   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1244   nztot = nzA + nzB;
1245 
1246   cmap  = mat->garray;
1247   if (v  || idx) {
1248     if (nztot) {
1249       /* Sort by increasing column numbers, assuming A and B already sorted */
1250       int imark = -1;
1251       if (v) {
1252         *v = v_p = mat->rowvalues;
1253         for (i=0; i<nzB; i++) {
1254           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1255           else break;
1256         }
1257         imark = i;
1258         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1259         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1260       }
1261       if (idx) {
1262         *idx = idx_p = mat->rowindices;
1263         if (imark > -1) {
1264           for (i=0; i<imark; i++) {
1265             idx_p[i] = cmap[cworkB[i]];
1266           }
1267         } else {
1268           for (i=0; i<nzB; i++) {
1269             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1270             else break;
1271           }
1272           imark = i;
1273         }
1274         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1275         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1276       }
1277     } else {
1278       if (idx) *idx = 0;
1279       if (v)   *v   = 0;
1280     }
1281   }
1282   *nz = nztot;
1283   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1284   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1290 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1291 {
1292   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1293 
1294   PetscFunctionBegin;
1295   if (aij->getrowactive == PETSC_FALSE) {
1296     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1297   }
1298   aij->getrowactive = PETSC_FALSE;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatNorm_MPIAIJ"
1304 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1305 {
1306   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1307   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1308   PetscErrorCode ierr;
1309   int          i,j,cstart = aij->cstart;
1310   PetscReal    sum = 0.0;
1311   PetscScalar  *v;
1312 
1313   PetscFunctionBegin;
1314   if (aij->size == 1) {
1315     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1316   } else {
1317     if (type == NORM_FROBENIUS) {
1318       v = amat->a;
1319       for (i=0; i<amat->nz; i++) {
1320 #if defined(PETSC_USE_COMPLEX)
1321         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1322 #else
1323         sum += (*v)*(*v); v++;
1324 #endif
1325       }
1326       v = bmat->a;
1327       for (i=0; i<bmat->nz; i++) {
1328 #if defined(PETSC_USE_COMPLEX)
1329         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1330 #else
1331         sum += (*v)*(*v); v++;
1332 #endif
1333       }
1334       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1335       *norm = sqrt(*norm);
1336     } else if (type == NORM_1) { /* max column norm */
1337       PetscReal *tmp,*tmp2;
1338       int    *jj,*garray = aij->garray;
1339       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1340       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1341       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1342       *norm = 0.0;
1343       v = amat->a; jj = amat->j;
1344       for (j=0; j<amat->nz; j++) {
1345         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1346       }
1347       v = bmat->a; jj = bmat->j;
1348       for (j=0; j<bmat->nz; j++) {
1349         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1350       }
1351       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1352       for (j=0; j<mat->N; j++) {
1353         if (tmp2[j] > *norm) *norm = tmp2[j];
1354       }
1355       ierr = PetscFree(tmp);CHKERRQ(ierr);
1356       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1357     } else if (type == NORM_INFINITY) { /* max row norm */
1358       PetscReal ntemp = 0.0;
1359       for (j=0; j<aij->A->m; j++) {
1360         v = amat->a + amat->i[j];
1361         sum = 0.0;
1362         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         v = bmat->a + bmat->i[j];
1366         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         if (sum > ntemp) ntemp = sum;
1370       }
1371       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1372     } else {
1373       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1374     }
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatTranspose_MPIAIJ"
1381 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1382 {
1383   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1384   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1385   PetscErrorCode ierr;
1386   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1387   Mat          B;
1388   PetscScalar  *array;
1389 
1390   PetscFunctionBegin;
1391   if (!matout && M != N) {
1392     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1393   }
1394 
1395   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1396   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1397   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1398 
1399   /* copy over the A part */
1400   Aloc = (Mat_SeqAIJ*)a->A->data;
1401   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1402   row = a->rstart;
1403   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1404   for (i=0; i<m; i++) {
1405     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1406     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1407   }
1408   aj = Aloc->j;
1409   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1410 
1411   /* copy over the B part */
1412   Aloc = (Mat_SeqAIJ*)a->B->data;
1413   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1414   row  = a->rstart;
1415   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1416   ct   = cols;
1417   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1418   for (i=0; i<m; i++) {
1419     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1420     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1421   }
1422   ierr = PetscFree(ct);CHKERRQ(ierr);
1423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   if (matout) {
1426     *matout = B;
1427   } else {
1428     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1435 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1436 {
1437   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1438   Mat        a = aij->A,b = aij->B;
1439   PetscErrorCode ierr;
1440   int        s1,s2,s3;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1444   if (rr) {
1445     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1446     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1447     /* Overlap communication with computation. */
1448     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1449   }
1450   if (ll) {
1451     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1452     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1453     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1454   }
1455   /* scale  the diagonal block */
1456   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1457 
1458   if (rr) {
1459     /* Do a scatter end and then right scale the off-diagonal block */
1460     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1461     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1462   }
1463 
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1470 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   if (!a->rank) {
1477     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1484 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1485 {
1486   PetscFunctionBegin;
1487   *bs = 1;
1488   PetscFunctionReturn(0);
1489 }
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1492 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatEqual_MPIAIJ"
1504 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1505 {
1506   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1507   Mat        a,b,c,d;
1508   PetscTruth flg;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   a = matA->A; b = matA->B;
1513   c = matB->A; d = matB->B;
1514 
1515   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1516   if (flg == PETSC_TRUE) {
1517     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1518   }
1519   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatCopy_MPIAIJ"
1525 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1526 {
1527   PetscErrorCode ierr;
1528   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1529   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1530 
1531   PetscFunctionBegin;
1532   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1533   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1534     /* because of the column compression in the off-processor part of the matrix a->B,
1535        the number of columns in a->B and b->B may be different, hence we cannot call
1536        the MatCopy() directly on the two parts. If need be, we can provide a more
1537        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1538        then copying the submatrices */
1539     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1540   } else {
1541     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1542     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1543   }
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1549 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1550 {
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #include "petscblaslapack.h"
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatAXPY_MPIAIJ"
1561 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1562 {
1563   PetscErrorCode ierr;
1564   int            i;
1565   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1566   PetscBLASInt   bnz,one=1;
1567   Mat_SeqAIJ     *x,*y;
1568 
1569   PetscFunctionBegin;
1570   if (str == SAME_NONZERO_PATTERN) {
1571     x = (Mat_SeqAIJ *)xx->A->data;
1572     y = (Mat_SeqAIJ *)yy->A->data;
1573     bnz = (PetscBLASInt)x->nz;
1574     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1575     x = (Mat_SeqAIJ *)xx->B->data;
1576     y = (Mat_SeqAIJ *)yy->B->data;
1577     bnz = (PetscBLASInt)x->nz;
1578     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1579   } else if (str == SUBSET_NONZERO_PATTERN) {
1580     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1581 
1582     x = (Mat_SeqAIJ *)xx->B->data;
1583     y = (Mat_SeqAIJ *)yy->B->data;
1584     if (y->xtoy && y->XtoY != xx->B) {
1585       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1586       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1587     }
1588     if (!y->xtoy) { /* get xtoy */
1589       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1590       y->XtoY = xx->B;
1591     }
1592     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1593   } else {
1594     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 /* -------------------------------------------------------------------*/
1600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1601        MatGetRow_MPIAIJ,
1602        MatRestoreRow_MPIAIJ,
1603        MatMult_MPIAIJ,
1604 /* 4*/ MatMultAdd_MPIAIJ,
1605        MatMultTranspose_MPIAIJ,
1606        MatMultTransposeAdd_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610 /*10*/ 0,
1611        0,
1612        0,
1613        MatRelax_MPIAIJ,
1614        MatTranspose_MPIAIJ,
1615 /*15*/ MatGetInfo_MPIAIJ,
1616        MatEqual_MPIAIJ,
1617        MatGetDiagonal_MPIAIJ,
1618        MatDiagonalScale_MPIAIJ,
1619        MatNorm_MPIAIJ,
1620 /*20*/ MatAssemblyBegin_MPIAIJ,
1621        MatAssemblyEnd_MPIAIJ,
1622        0,
1623        MatSetOption_MPIAIJ,
1624        MatZeroEntries_MPIAIJ,
1625 /*25*/ MatZeroRows_MPIAIJ,
1626 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1627        MatLUFactorSymbolic_MPIAIJ_TFS,
1628 #else
1629        0,
1630 #endif
1631        0,
1632        0,
1633        0,
1634 /*30*/ MatSetUpPreallocation_MPIAIJ,
1635        0,
1636        0,
1637        0,
1638        0,
1639 /*35*/ MatDuplicate_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643        0,
1644 /*40*/ MatAXPY_MPIAIJ,
1645        MatGetSubMatrices_MPIAIJ,
1646        MatIncreaseOverlap_MPIAIJ,
1647        MatGetValues_MPIAIJ,
1648        MatCopy_MPIAIJ,
1649 /*45*/ MatPrintHelp_MPIAIJ,
1650        MatScale_MPIAIJ,
1651        0,
1652        0,
1653        0,
1654 /*50*/ MatGetBlockSize_MPIAIJ,
1655        0,
1656        0,
1657        0,
1658        0,
1659 /*55*/ MatFDColoringCreate_MPIAIJ,
1660        0,
1661        MatSetUnfactored_MPIAIJ,
1662        0,
1663        0,
1664 /*60*/ MatGetSubMatrix_MPIAIJ,
1665        MatDestroy_MPIAIJ,
1666        MatView_MPIAIJ,
1667        MatGetPetscMaps_Petsc,
1668        0,
1669 /*65*/ 0,
1670        0,
1671        0,
1672        0,
1673        0,
1674 /*70*/ 0,
1675        0,
1676        MatSetColoring_MPIAIJ,
1677        MatSetValuesAdic_MPIAIJ,
1678        MatSetValuesAdifor_MPIAIJ,
1679 /*75*/ 0,
1680        0,
1681        0,
1682        0,
1683        0,
1684 /*80*/ 0,
1685        0,
1686        0,
1687        0,
1688 /*84*/ MatLoad_MPIAIJ,
1689        0,
1690        0,
1691        0,
1692        0,
1693        0,
1694 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1695        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1696        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1697        MatPtAP_MPIAIJ_MPIAIJ,
1698        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1699 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1700        0,
1701        0,
1702        0};
1703 
1704 /* ----------------------------------------------------------------------------------------*/
1705 
1706 EXTERN_C_BEGIN
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1709 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1710 {
1711   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1716   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 EXTERN_C_BEGIN
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1725 {
1726   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1727   PetscErrorCode ierr;
1728 
1729   PetscFunctionBegin;
1730   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1731   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 #include "petscpc.h"
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1740 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1741 {
1742   Mat_MPIAIJ   *b;
1743   PetscErrorCode ierr;
1744   int          i;
1745 
1746   PetscFunctionBegin;
1747   B->preallocated = PETSC_TRUE;
1748   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1749   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1750   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1751   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1752   if (d_nnz) {
1753     for (i=0; i<B->m; i++) {
1754       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1755     }
1756   }
1757   if (o_nnz) {
1758     for (i=0; i<B->m; i++) {
1759       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1760     }
1761   }
1762   b = (Mat_MPIAIJ*)B->data;
1763   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1764   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1765 
1766   PetscFunctionReturn(0);
1767 }
1768 EXTERN_C_END
1769 
1770 /*MC
1771    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1772 
1773    Options Database Keys:
1774 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1775 
1776   Level: beginner
1777 
1778 .seealso: MatCreateMPIAIJ
1779 M*/
1780 
1781 EXTERN_C_BEGIN
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCreate_MPIAIJ"
1784 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1785 {
1786   Mat_MPIAIJ *b;
1787   PetscErrorCode ierr;
1788   int        i,size;
1789 
1790   PetscFunctionBegin;
1791   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1792 
1793   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1794   B->data         = (void*)b;
1795   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1796   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1797   B->factor       = 0;
1798   B->assembled    = PETSC_FALSE;
1799   B->mapping      = 0;
1800 
1801   B->insertmode      = NOT_SET_VALUES;
1802   b->size            = size;
1803   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1804 
1805   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1806   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1807 
1808   /* the information in the maps duplicates the information computed below, eventually
1809      we should remove the duplicate information that is not contained in the maps */
1810   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1811   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1812 
1813   /* build local table of row and column ownerships */
1814   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1815   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1816   b->cowners = b->rowners + b->size + 2;
1817   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1818   b->rowners[0] = 0;
1819   for (i=2; i<=b->size; i++) {
1820     b->rowners[i] += b->rowners[i-1];
1821   }
1822   b->rstart = b->rowners[b->rank];
1823   b->rend   = b->rowners[b->rank+1];
1824   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1825   b->cowners[0] = 0;
1826   for (i=2; i<=b->size; i++) {
1827     b->cowners[i] += b->cowners[i-1];
1828   }
1829   b->cstart = b->cowners[b->rank];
1830   b->cend   = b->cowners[b->rank+1];
1831 
1832   /* build cache for off array entries formed */
1833   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1834   b->donotstash  = PETSC_FALSE;
1835   b->colmap      = 0;
1836   b->garray      = 0;
1837   b->roworiented = PETSC_TRUE;
1838 
1839   /* stuff used for matrix vector multiply */
1840   b->lvec      = PETSC_NULL;
1841   b->Mvctx     = PETSC_NULL;
1842 
1843   /* stuff for MatGetRow() */
1844   b->rowindices   = 0;
1845   b->rowvalues    = 0;
1846   b->getrowactive = PETSC_FALSE;
1847 
1848   /* Explicitly create 2 MATSEQAIJ matrices. */
1849   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1850   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1851   PetscLogObjectParent(B,b->A);
1852   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1853   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1854   PetscLogObjectParent(B,b->B);
1855 
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1857                                      "MatStoreValues_MPIAIJ",
1858                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1860                                      "MatRetrieveValues_MPIAIJ",
1861                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1863 				     "MatGetDiagonalBlock_MPIAIJ",
1864                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1866 				     "MatIsTranspose_MPIAIJ",
1867 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1869 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1870 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1872 				     "MatDiagonalScaleLocal_MPIAIJ",
1873 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 /*MC
1879    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1880 
1881    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1882    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1883   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1884   for communicators controlling multiple processes.  It is recommended that you call both of
1885   the above preallocation routines for simplicity.
1886 
1887    Options Database Keys:
1888 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1889 
1890   Level: beginner
1891 
1892 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1893 M*/
1894 
1895 EXTERN_C_BEGIN
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatCreate_AIJ"
1898 PetscErrorCode MatCreate_AIJ(Mat A)
1899 {
1900   PetscErrorCode ierr;
1901   int size;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1905   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1906   if (size == 1) {
1907     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1908   } else {
1909     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 EXTERN_C_END
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1917 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1918 {
1919   Mat        mat;
1920   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   *newmat       = 0;
1925   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1926   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1927   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1928   a    = (Mat_MPIAIJ*)mat->data;
1929 
1930   mat->factor       = matin->factor;
1931   mat->assembled    = PETSC_TRUE;
1932   mat->insertmode   = NOT_SET_VALUES;
1933   mat->preallocated = PETSC_TRUE;
1934 
1935   a->rstart       = oldmat->rstart;
1936   a->rend         = oldmat->rend;
1937   a->cstart       = oldmat->cstart;
1938   a->cend         = oldmat->cend;
1939   a->size         = oldmat->size;
1940   a->rank         = oldmat->rank;
1941   a->donotstash   = oldmat->donotstash;
1942   a->roworiented  = oldmat->roworiented;
1943   a->rowindices   = 0;
1944   a->rowvalues    = 0;
1945   a->getrowactive = PETSC_FALSE;
1946 
1947   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1948   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1949   if (oldmat->colmap) {
1950 #if defined (PETSC_USE_CTABLE)
1951     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1952 #else
1953     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1954     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1955     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1956 #endif
1957   } else a->colmap = 0;
1958   if (oldmat->garray) {
1959     int len;
1960     len  = oldmat->B->n;
1961     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1962     PetscLogObjectMemory(mat,len*sizeof(int));
1963     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1964   } else a->garray = 0;
1965 
1966   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1967   PetscLogObjectParent(mat,a->lvec);
1968   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1969   PetscLogObjectParent(mat,a->Mvctx);
1970   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1971   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1972   PetscLogObjectParent(mat,a->A);
1973   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1974   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1975   PetscLogObjectParent(mat,a->B);
1976   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1977   *newmat = mat;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #include "petscsys.h"
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatLoad_MPIAIJ"
1985 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1986 {
1987   Mat            A;
1988   PetscScalar    *vals,*svals;
1989   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1990   MPI_Status     status;
1991   PetscErrorCode ierr;
1992   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1993   int            i,nz,j,rstart,rend,fd;
1994   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1995   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1996   int            cend,cstart,n;
1997 
1998   PetscFunctionBegin;
1999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2000   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2001   if (!rank) {
2002     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2003     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2004     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2005     if (header[3] < 0) {
2006       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
2007     }
2008   }
2009 
2010   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2011   M = header[1]; N = header[2];
2012   /* determine ownership of all rows */
2013   m = M/size + ((M % size) > rank);
2014   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2015   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2016   rowners[0] = 0;
2017   for (i=2; i<=size; i++) {
2018     rowners[i] += rowners[i-1];
2019   }
2020   rstart = rowners[rank];
2021   rend   = rowners[rank+1];
2022 
2023   /* distribute row lengths to all processors */
2024   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2025   offlens = ourlens + (rend-rstart);
2026   if (!rank) {
2027     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2028     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2029     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2030     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2031     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2032     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2033   } else {
2034     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2035   }
2036 
2037   if (!rank) {
2038     /* calculate the number of nonzeros on each processor */
2039     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2041     for (i=0; i<size; i++) {
2042       for (j=rowners[i]; j< rowners[i+1]; j++) {
2043         procsnz[i] += rowlengths[j];
2044       }
2045     }
2046     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2047 
2048     /* determine max buffer needed and allocate it */
2049     maxnz = 0;
2050     for (i=0; i<size; i++) {
2051       maxnz = PetscMax(maxnz,procsnz[i]);
2052     }
2053     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2054 
2055     /* read in my part of the matrix column indices  */
2056     nz   = procsnz[0];
2057     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2058     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2059 
2060     /* read in every one elses and ship off */
2061     for (i=1; i<size; i++) {
2062       nz   = procsnz[i];
2063       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2064       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2065     }
2066     ierr = PetscFree(cols);CHKERRQ(ierr);
2067   } else {
2068     /* determine buffer space needed for message */
2069     nz = 0;
2070     for (i=0; i<m; i++) {
2071       nz += ourlens[i];
2072     }
2073     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2074 
2075     /* receive message of column indices*/
2076     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2077     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2078     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2079   }
2080 
2081   /* determine column ownership if matrix is not square */
2082   if (N != M) {
2083     n      = N/size + ((N % size) > rank);
2084     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2085     cstart = cend - n;
2086   } else {
2087     cstart = rstart;
2088     cend   = rend;
2089     n      = cend - cstart;
2090   }
2091 
2092   /* loop over local rows, determining number of off diagonal entries */
2093   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2094   jj = 0;
2095   for (i=0; i<m; i++) {
2096     for (j=0; j<ourlens[i]; j++) {
2097       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2098       jj++;
2099     }
2100   }
2101 
2102   /* create our matrix */
2103   for (i=0; i<m; i++) {
2104     ourlens[i] -= offlens[i];
2105   }
2106   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2107   ierr = MatSetType(A,type);CHKERRQ(ierr);
2108   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2109 
2110   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2111   for (i=0; i<m; i++) {
2112     ourlens[i] += offlens[i];
2113   }
2114 
2115   if (!rank) {
2116     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2117 
2118     /* read in my part of the matrix numerical values  */
2119     nz   = procsnz[0];
2120     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2121 
2122     /* insert into matrix */
2123     jj      = rstart;
2124     smycols = mycols;
2125     svals   = vals;
2126     for (i=0; i<m; i++) {
2127       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2128       smycols += ourlens[i];
2129       svals   += ourlens[i];
2130       jj++;
2131     }
2132 
2133     /* read in other processors and ship out */
2134     for (i=1; i<size; i++) {
2135       nz   = procsnz[i];
2136       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2137       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2138     }
2139     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2140   } else {
2141     /* receive numeric values */
2142     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2143 
2144     /* receive message of values*/
2145     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2146     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2147     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2148 
2149     /* insert into matrix */
2150     jj      = rstart;
2151     smycols = mycols;
2152     svals   = vals;
2153     for (i=0; i<m; i++) {
2154       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2155       smycols += ourlens[i];
2156       svals   += ourlens[i];
2157       jj++;
2158     }
2159   }
2160   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2161   ierr = PetscFree(vals);CHKERRQ(ierr);
2162   ierr = PetscFree(mycols);CHKERRQ(ierr);
2163   ierr = PetscFree(rowners);CHKERRQ(ierr);
2164 
2165   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   *newmat = A;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2173 /*
2174     Not great since it makes two copies of the submatrix, first an SeqAIJ
2175   in local and then by concatenating the local matrices the end result.
2176   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2177 */
2178 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2179 {
2180   PetscErrorCode ierr;
2181   PetscMPIInt    rank,size;
2182   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2183   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2184   Mat            *local,M,Mreuse;
2185   PetscScalar    *vwork,*aa;
2186   MPI_Comm       comm = mat->comm;
2187   Mat_SeqAIJ     *aij;
2188 
2189 
2190   PetscFunctionBegin;
2191   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2193 
2194   if (call ==  MAT_REUSE_MATRIX) {
2195     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2196     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2197     local = &Mreuse;
2198     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2199   } else {
2200     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2201     Mreuse = *local;
2202     ierr   = PetscFree(local);CHKERRQ(ierr);
2203   }
2204 
2205   /*
2206       m - number of local rows
2207       n - number of columns (same on all processors)
2208       rstart - first row in new global matrix generated
2209   */
2210   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2211   if (call == MAT_INITIAL_MATRIX) {
2212     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2213     ii  = aij->i;
2214     jj  = aij->j;
2215 
2216     /*
2217         Determine the number of non-zeros in the diagonal and off-diagonal
2218         portions of the matrix in order to do correct preallocation
2219     */
2220 
2221     /* first get start and end of "diagonal" columns */
2222     if (csize == PETSC_DECIDE) {
2223       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2224       if (mglobal == n) { /* square matrix */
2225 	nlocal = m;
2226       } else {
2227         nlocal = n/size + ((n % size) > rank);
2228       }
2229     } else {
2230       nlocal = csize;
2231     }
2232     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2233     rstart = rend - nlocal;
2234     if (rank == size - 1 && rend != n) {
2235       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2236     }
2237 
2238     /* next, compute all the lengths */
2239     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2240     olens = dlens + m;
2241     for (i=0; i<m; i++) {
2242       jend = ii[i+1] - ii[i];
2243       olen = 0;
2244       dlen = 0;
2245       for (j=0; j<jend; j++) {
2246         if (*jj < rstart || *jj >= rend) olen++;
2247         else dlen++;
2248         jj++;
2249       }
2250       olens[i] = olen;
2251       dlens[i] = dlen;
2252     }
2253     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2254     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2255     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2256     ierr = PetscFree(dlens);CHKERRQ(ierr);
2257   } else {
2258     int ml,nl;
2259 
2260     M = *newmat;
2261     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2262     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2263     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2264     /*
2265          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2266        rather than the slower MatSetValues().
2267     */
2268     M->was_assembled = PETSC_TRUE;
2269     M->assembled     = PETSC_FALSE;
2270   }
2271   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2272   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2273   ii  = aij->i;
2274   jj  = aij->j;
2275   aa  = aij->a;
2276   for (i=0; i<m; i++) {
2277     row   = rstart + i;
2278     nz    = ii[i+1] - ii[i];
2279     cwork = jj;     jj += nz;
2280     vwork = aa;     aa += nz;
2281     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2282   }
2283 
2284   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   *newmat = M;
2287 
2288   /* save submatrix used in processor for next request */
2289   if (call ==  MAT_INITIAL_MATRIX) {
2290     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2291     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2292   }
2293 
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2299 /*@C
2300    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2301    (the default parallel PETSc format).  For good matrix assembly performance
2302    the user should preallocate the matrix storage by setting the parameters
2303    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2304    performance can be increased by more than a factor of 50.
2305 
2306    Collective on MPI_Comm
2307 
2308    Input Parameters:
2309 +  A - the matrix
2310 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2311            (same value is used for all local rows)
2312 .  d_nnz - array containing the number of nonzeros in the various rows of the
2313            DIAGONAL portion of the local submatrix (possibly different for each row)
2314            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2315            The size of this array is equal to the number of local rows, i.e 'm'.
2316            You must leave room for the diagonal entry even if it is zero.
2317 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2318            submatrix (same value is used for all local rows).
2319 -  o_nnz - array containing the number of nonzeros in the various rows of the
2320            OFF-DIAGONAL portion of the local submatrix (possibly different for
2321            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2322            structure. The size of this array is equal to the number
2323            of local rows, i.e 'm'.
2324 
2325    The AIJ format (also called the Yale sparse matrix format or
2326    compressed row storage), is fully compatible with standard Fortran 77
2327    storage.  That is, the stored row and column indices can begin at
2328    either one (as in Fortran) or zero.  See the users manual for details.
2329 
2330    The user MUST specify either the local or global matrix dimensions
2331    (possibly both).
2332 
2333    The parallel matrix is partitioned such that the first m0 rows belong to
2334    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2335    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2336 
2337    The DIAGONAL portion of the local submatrix of a processor can be defined
2338    as the submatrix which is obtained by extraction the part corresponding
2339    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2340    first row that belongs to the processor, and r2 is the last row belonging
2341    to the this processor. This is a square mxm matrix. The remaining portion
2342    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2343 
2344    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2345 
2346    By default, this format uses inodes (identical nodes) when possible.
2347    We search for consecutive rows with the same nonzero structure, thereby
2348    reusing matrix information to achieve increased efficiency.
2349 
2350    Options Database Keys:
2351 +  -mat_aij_no_inode  - Do not use inodes
2352 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2353 -  -mat_aij_oneindex - Internally use indexing starting at 1
2354         rather than 0.  Note that when calling MatSetValues(),
2355         the user still MUST index entries starting at 0!
2356 
2357    Example usage:
2358 
2359    Consider the following 8x8 matrix with 34 non-zero values, that is
2360    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2361    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2362    as follows:
2363 
2364 .vb
2365             1  2  0  |  0  3  0  |  0  4
2366     Proc0   0  5  6  |  7  0  0  |  8  0
2367             9  0 10  | 11  0  0  | 12  0
2368     -------------------------------------
2369            13  0 14  | 15 16 17  |  0  0
2370     Proc1   0 18  0  | 19 20 21  |  0  0
2371             0  0  0  | 22 23  0  | 24  0
2372     -------------------------------------
2373     Proc2  25 26 27  |  0  0 28  | 29  0
2374            30  0  0  | 31 32 33  |  0 34
2375 .ve
2376 
2377    This can be represented as a collection of submatrices as:
2378 
2379 .vb
2380       A B C
2381       D E F
2382       G H I
2383 .ve
2384 
2385    Where the submatrices A,B,C are owned by proc0, D,E,F are
2386    owned by proc1, G,H,I are owned by proc2.
2387 
2388    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2389    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2390    The 'M','N' parameters are 8,8, and have the same values on all procs.
2391 
2392    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2393    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2394    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2395    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2396    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2397    matrix, ans [DF] as another SeqAIJ matrix.
2398 
2399    When d_nz, o_nz parameters are specified, d_nz storage elements are
2400    allocated for every row of the local diagonal submatrix, and o_nz
2401    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2402    One way to choose d_nz and o_nz is to use the max nonzerors per local
2403    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2404    In this case, the values of d_nz,o_nz are:
2405 .vb
2406      proc0 : dnz = 2, o_nz = 2
2407      proc1 : dnz = 3, o_nz = 2
2408      proc2 : dnz = 1, o_nz = 4
2409 .ve
2410    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2411    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2412    for proc3. i.e we are using 12+15+10=37 storage locations to store
2413    34 values.
2414 
2415    When d_nnz, o_nnz parameters are specified, the storage is specified
2416    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2417    In the above case the values for d_nnz,o_nnz are:
2418 .vb
2419      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2420      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2421      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2422 .ve
2423    Here the space allocated is sum of all the above values i.e 34, and
2424    hence pre-allocation is perfect.
2425 
2426    Level: intermediate
2427 
2428 .keywords: matrix, aij, compressed row, sparse, parallel
2429 
2430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2431 @*/
2432 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2433 {
2434   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2438   if (f) {
2439     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2440   }
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatCreateMPIAIJ"
2446 /*@C
2447    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2448    (the default parallel PETSc format).  For good matrix assembly performance
2449    the user should preallocate the matrix storage by setting the parameters
2450    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2451    performance can be increased by more than a factor of 50.
2452 
2453    Collective on MPI_Comm
2454 
2455    Input Parameters:
2456 +  comm - MPI communicator
2457 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2458            This value should be the same as the local size used in creating the
2459            y vector for the matrix-vector product y = Ax.
2460 .  n - This value should be the same as the local size used in creating the
2461        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2462        calculated if N is given) For square matrices n is almost always m.
2463 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2464 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2465 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2466            (same value is used for all local rows)
2467 .  d_nnz - array containing the number of nonzeros in the various rows of the
2468            DIAGONAL portion of the local submatrix (possibly different for each row)
2469            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2470            The size of this array is equal to the number of local rows, i.e 'm'.
2471            You must leave room for the diagonal entry even if it is zero.
2472 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2473            submatrix (same value is used for all local rows).
2474 -  o_nnz - array containing the number of nonzeros in the various rows of the
2475            OFF-DIAGONAL portion of the local submatrix (possibly different for
2476            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2477            structure. The size of this array is equal to the number
2478            of local rows, i.e 'm'.
2479 
2480    Output Parameter:
2481 .  A - the matrix
2482 
2483    Notes:
2484    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2485    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2486    storage requirements for this matrix.
2487 
2488    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2489    processor than it must be used on all processors that share the object for
2490    that argument.
2491 
2492    The AIJ format (also called the Yale sparse matrix format or
2493    compressed row storage), is fully compatible with standard Fortran 77
2494    storage.  That is, the stored row and column indices can begin at
2495    either one (as in Fortran) or zero.  See the users manual for details.
2496 
2497    The user MUST specify either the local or global matrix dimensions
2498    (possibly both).
2499 
2500    The parallel matrix is partitioned such that the first m0 rows belong to
2501    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2502    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2503 
2504    The DIAGONAL portion of the local submatrix of a processor can be defined
2505    as the submatrix which is obtained by extraction the part corresponding
2506    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2507    first row that belongs to the processor, and r2 is the last row belonging
2508    to the this processor. This is a square mxm matrix. The remaining portion
2509    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2510 
2511    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2512 
2513    When calling this routine with a single process communicator, a matrix of
2514    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2515    type of communicator, use the construction mechanism:
2516      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2517 
2518    By default, this format uses inodes (identical nodes) when possible.
2519    We search for consecutive rows with the same nonzero structure, thereby
2520    reusing matrix information to achieve increased efficiency.
2521 
2522    Options Database Keys:
2523 +  -mat_aij_no_inode  - Do not use inodes
2524 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2525 -  -mat_aij_oneindex - Internally use indexing starting at 1
2526         rather than 0.  Note that when calling MatSetValues(),
2527         the user still MUST index entries starting at 0!
2528 
2529 
2530    Example usage:
2531 
2532    Consider the following 8x8 matrix with 34 non-zero values, that is
2533    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2534    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2535    as follows:
2536 
2537 .vb
2538             1  2  0  |  0  3  0  |  0  4
2539     Proc0   0  5  6  |  7  0  0  |  8  0
2540             9  0 10  | 11  0  0  | 12  0
2541     -------------------------------------
2542            13  0 14  | 15 16 17  |  0  0
2543     Proc1   0 18  0  | 19 20 21  |  0  0
2544             0  0  0  | 22 23  0  | 24  0
2545     -------------------------------------
2546     Proc2  25 26 27  |  0  0 28  | 29  0
2547            30  0  0  | 31 32 33  |  0 34
2548 .ve
2549 
2550    This can be represented as a collection of submatrices as:
2551 
2552 .vb
2553       A B C
2554       D E F
2555       G H I
2556 .ve
2557 
2558    Where the submatrices A,B,C are owned by proc0, D,E,F are
2559    owned by proc1, G,H,I are owned by proc2.
2560 
2561    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2562    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2563    The 'M','N' parameters are 8,8, and have the same values on all procs.
2564 
2565    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2566    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2567    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2568    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2569    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2570    matrix, ans [DF] as another SeqAIJ matrix.
2571 
2572    When d_nz, o_nz parameters are specified, d_nz storage elements are
2573    allocated for every row of the local diagonal submatrix, and o_nz
2574    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2575    One way to choose d_nz and o_nz is to use the max nonzerors per local
2576    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2577    In this case, the values of d_nz,o_nz are:
2578 .vb
2579      proc0 : dnz = 2, o_nz = 2
2580      proc1 : dnz = 3, o_nz = 2
2581      proc2 : dnz = 1, o_nz = 4
2582 .ve
2583    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2584    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2585    for proc3. i.e we are using 12+15+10=37 storage locations to store
2586    34 values.
2587 
2588    When d_nnz, o_nnz parameters are specified, the storage is specified
2589    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2590    In the above case the values for d_nnz,o_nnz are:
2591 .vb
2592      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2593      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2594      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2595 .ve
2596    Here the space allocated is sum of all the above values i.e 34, and
2597    hence pre-allocation is perfect.
2598 
2599    Level: intermediate
2600 
2601 .keywords: matrix, aij, compressed row, sparse, parallel
2602 
2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2604 @*/
2605 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2606 {
2607   PetscErrorCode ierr;
2608   int size;
2609 
2610   PetscFunctionBegin;
2611   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2613   if (size > 1) {
2614     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2615     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2616   } else {
2617     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2618     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2619   }
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2626 {
2627   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2628   PetscFunctionBegin;
2629   *Ad     = a->A;
2630   *Ao     = a->B;
2631   *colmap = a->garray;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2637 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2638 {
2639   PetscErrorCode ierr;
2640   int        i;
2641   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2642 
2643   PetscFunctionBegin;
2644   if (coloring->ctype == IS_COLORING_LOCAL) {
2645     ISColoringValue *allcolors,*colors;
2646     ISColoring      ocoloring;
2647 
2648     /* set coloring for diagonal portion */
2649     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2650 
2651     /* set coloring for off-diagonal portion */
2652     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2653     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2654     for (i=0; i<a->B->n; i++) {
2655       colors[i] = allcolors[a->garray[i]];
2656     }
2657     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2658     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2659     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2660     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2661   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2662     ISColoringValue *colors;
2663     int             *larray;
2664     ISColoring      ocoloring;
2665 
2666     /* set coloring for diagonal portion */
2667     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2668     for (i=0; i<a->A->n; i++) {
2669       larray[i] = i + a->cstart;
2670     }
2671     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2672     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2673     for (i=0; i<a->A->n; i++) {
2674       colors[i] = coloring->colors[larray[i]];
2675     }
2676     ierr = PetscFree(larray);CHKERRQ(ierr);
2677     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2678     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2679     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2680 
2681     /* set coloring for off-diagonal portion */
2682     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2683     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2684     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2685     for (i=0; i<a->B->n; i++) {
2686       colors[i] = coloring->colors[larray[i]];
2687     }
2688     ierr = PetscFree(larray);CHKERRQ(ierr);
2689     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2690     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2691     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2692   } else {
2693     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2694   }
2695 
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2701 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2702 {
2703   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2708   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2714 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2721   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatMerge"
2727 /*@C
2728       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2729                  matrices from each processor
2730 
2731     Collective on MPI_Comm
2732 
2733    Input Parameters:
2734 +    comm - the communicators the parallel matrix will live on
2735 .    inmat - the input sequential matrices
2736 .    n - number of local columns (or PETSC_DECIDE)
2737 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2738 
2739    Output Parameter:
2740 .    outmat - the parallel matrix generated
2741 
2742     Level: advanced
2743 
2744    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2745 
2746 @*/
2747 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2748 {
2749   PetscErrorCode ierr;
2750   int               m,N,i,rstart,nnz,I,*dnz,*onz;
2751   const int         *indx;
2752   const PetscScalar *values;
2753   PetscMap          columnmap,rowmap;
2754 
2755   PetscFunctionBegin;
2756 
2757   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2758   /*
2759   PetscMPIInt       rank;
2760   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2761   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2762   */
2763   if (scall == MAT_INITIAL_MATRIX){
2764     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2765     if (n == PETSC_DECIDE){
2766       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2767       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2768       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2769       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2770       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2771     }
2772 
2773     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2774     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2775     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2776     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2777     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2778 
2779     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2780     for (i=0;i<m;i++) {
2781       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2782       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2783       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2784     }
2785     /* This routine will ONLY return MPIAIJ type matrix */
2786     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2787     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2788     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2789     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2790 
2791   } else if (scall == MAT_REUSE_MATRIX){
2792     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2793   } else {
2794     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2795   }
2796 
2797   for (i=0;i<m;i++) {
2798     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2799     I    = i + rstart;
2800     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2801     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2802   }
2803   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2804   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2805   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2806 
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #undef __FUNCT__
2811 #define __FUNCT__ "MatFileSplit"
2812 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2813 {
2814   PetscErrorCode    ierr;
2815   PetscMPIInt       rank;
2816   int               m,N,i,rstart,nnz;
2817   size_t            len;
2818   const int         *indx;
2819   PetscViewer       out;
2820   char              *name;
2821   Mat               B;
2822   const PetscScalar *values;
2823 
2824   PetscFunctionBegin;
2825 
2826   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2827   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2828   /* Should this be the type of the diagonal block of A? */
2829   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2830   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2831   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2832   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2833   for (i=0;i<m;i++) {
2834     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2835     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2836     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2837   }
2838   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840 
2841   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2842   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2843   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2844   sprintf(name,"%s.%d",outfile,rank);
2845   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2846   ierr = PetscFree(name);
2847   ierr = MatView(B,out);CHKERRQ(ierr);
2848   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2849   ierr = MatDestroy(B);CHKERRQ(ierr);
2850   PetscFunctionReturn(0);
2851 }
2852 
2853 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2854 #undef __FUNCT__
2855 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2856 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2857 {
2858   PetscErrorCode       ierr;
2859   Mat_Merge_SeqsToMPI  *merge;
2860   PetscObjectContainer container;
2861 
2862   PetscFunctionBegin;
2863   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2864   if (container) {
2865     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2866     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2867     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2868     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2869     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2870     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2871     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2872     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2873     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2874     ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);
2875 
2876     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2877     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2878   }
2879   ierr = PetscFree(merge);CHKERRQ(ierr);
2880 
2881   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 #include "src/mat/utils/freespace.h"
2886 #include "petscbt.h"
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatMerge_SeqsToMPI"
2889 /*@C
2890       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2891                  matrices from each processor
2892 
2893     Collective on MPI_Comm
2894 
2895    Input Parameters:
2896 +    comm - the communicators the parallel matrix will live on
2897 .    seqmat - the input sequential matrices
2898 .    m - number of local rows (or PETSC_DECIDE)
2899 .    n - number of local columns (or PETSC_DECIDE)
2900 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2901 
2902    Output Parameter:
2903 .    mpimat - the parallel matrix generated
2904 
2905     Level: advanced
2906 
2907    Notes:
2908      The dimensions of the sequential matrix in each processor MUST be the same.
2909      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2910      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2911 @*/
2912 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2913 {
2914   PetscErrorCode    ierr;
2915   MPI_Comm          comm=mpimat->comm;
2916   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2917   PetscMPIInt       size,rank;
2918   int               N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2919   int               *len_s,proc,m;
2920   int               taga,**buf_ri,**buf_rj;
2921   int               k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2922   int               nrows,**buf_ri_k,**nextrow,**nextai;
2923   MPI_Request       *s_waits,*r_waits;
2924   MPI_Status        *status;
2925   MatScalar         *aa=a->a,**abuf_r,*ba_i;
2926   Mat_Merge_SeqsToMPI  *merge;
2927   PetscObjectContainer container;
2928 
2929   PetscFunctionBegin;
2930   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2931   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2932 
2933   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2934   if (container) {
2935     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2936   }
2937   bi     = merge->bi;
2938   bj     = merge->bj;
2939   buf_ri = merge->buf_ri;
2940   buf_rj = merge->buf_rj;
2941 
2942   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2943   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2944   len_s  = merge->len_s;
2945 
2946   /* send and recv matrix values */
2947   /*-----------------------------*/
2948   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2949   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2950 
2951   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2952   for (proc=0,k=0; proc<size; proc++){
2953     if (!len_s[proc]) continue;
2954     i = owners[proc];
2955     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2956     k++;
2957   }
2958 
2959   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2960   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2961   ierr = PetscFree(status);CHKERRQ(ierr);
2962 
2963   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2964   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2965 
2966   /* insert mat values of mpimat */
2967   /*----------------------------*/
2968   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2969   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(int**),&buf_ri_k);CHKERRQ(ierr);
2970   nextrow = buf_ri_k + merge->nrecv;
2971   nextai  = nextrow + merge->nrecv;
2972 
2973   for (k=0; k<merge->nrecv; k++){
2974     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2975     nrows = *(buf_ri_k[k]);
2976     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2977     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2978   }
2979 
2980   /* set values of ba */
2981   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2982   for (i=0; i<m; i++) {
2983     arow = owners[rank] + i;
2984     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2985     bnzi = bi[i+1] - bi[i];
2986     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2987 
2988     /* add local non-zero vals of this proc's seqmat into ba */
2989     anzi = ai[arow+1] - ai[arow];
2990     aj   = a->j + ai[arow];
2991     aa   = a->a + ai[arow];
2992     nextaj = 0;
2993     for (j=0; nextaj<anzi; j++){
2994       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2995         ba_i[j] += aa[nextaj++];
2996       }
2997     }
2998 
2999     /* add received vals into ba */
3000     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3001       /* i-th row */
3002       if (i == *nextrow[k]) {
3003         anzi = *(nextai[k]+1) - *nextai[k];
3004         aj   = buf_rj[k] + *(nextai[k]);
3005         aa   = abuf_r[k] + *(nextai[k]);
3006         nextaj = 0;
3007         for (j=0; nextaj<anzi; j++){
3008           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3009             ba_i[j] += aa[nextaj++];
3010           }
3011         }
3012         nextrow[k]++; nextai[k]++;
3013       }
3014     }
3015     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3016   }
3017   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3018   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3019 
3020   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3021   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3022   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3023 
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3028 {
3029   PetscErrorCode    ierr;
3030   Mat               B_seq,B_mpi;
3031   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
3032   PetscMPIInt       size,rank;
3033   int               M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3034   int               len,*len_s,proc;
3035   int               tagi,tagj,*len_si,*len_ri,**buf_ri,**buf_rj;
3036   int               k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3037   int               nrows,*buf_s,*buf_si,*buf_si_i,**buf_ri_k,**nextrow,**nextai;
3038   MPI_Request       *si_waits,*sj_waits,*ri_waits,*rj_waits;
3039   MPI_Status        *status;
3040   MatScalar         *ba;
3041   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
3042   PetscBT           lnkbt;
3043   Mat_Merge_SeqsToMPI  *merge;
3044   PetscObjectContainer container;
3045 
3046   PetscFunctionBegin;
3047   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3048   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3049 
3050   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3051   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3052 
3053   /* determine the number of messages to send, their lengths */
3054   /*---------------------------------------------------------*/
3055   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3056   if (m == PETSC_DECIDE) {
3057     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3058   } else {
3059     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3060   }
3061   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3062   ierr = PetscMalloc(size*sizeof(int),&len_si);CHKERRQ(ierr);
3063   ierr = PetscMalloc(size*sizeof(int),&merge->len_s);CHKERRQ(ierr);
3064 
3065   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3066   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3067   len_s  = merge->len_s;
3068 
3069   len = 0;  /* length of buf_si[] */
3070   merge->nsend = 0;
3071   for (proc=0; proc<size; proc++){
3072     len_si[proc] = 0;
3073     if (proc == rank){
3074       len_si[proc] = len_s[proc] = 0;
3075     } else {
3076       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3077       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3078     }
3079     if (len_s[proc]) {
3080       merge->nsend++;
3081       nrows = 0;
3082       for (i=owners[proc]; i<owners[proc+1]; i++){
3083         if (ai[i+1] > ai[i]) nrows++;
3084       }
3085       len_si[proc] = 2*(nrows+1);
3086       len += len_si[proc];
3087     }
3088   }
3089 
3090   /* determine the number and length of messages to receive for ij-structure */
3091   /*-------------------------------------------------------------------------*/
3092   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3093   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3094 
3095   /* post the Irecv of j-structure */
3096   /*-------------------------------*/
3097   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3098   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3099 
3100   /* post the Isend of j-structure */
3101   /*--------------------------------*/
3102   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3103   sj_waits = si_waits + merge->nsend;
3104 
3105   for (proc=0, k=0; proc<size; proc++){
3106     if (!len_s[proc]) continue;
3107     i = owners[proc];
3108     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPI_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3109     k++;
3110   }
3111 
3112   /* receives and sends of j-structure are complete */
3113   /*------------------------------------------------*/
3114   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3115   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3116 
3117   /* send and recv i-structure */
3118   /*---------------------------*/
3119   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3120   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3121 
3122   ierr = PetscMalloc((len+1)*sizeof(int),&buf_s);CHKERRQ(ierr);
3123   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3124   for (proc=0,k=0; proc<size; proc++){
3125     if (!len_s[proc]) continue;
3126     /* form outgoing message for i-structure:
3127          buf_si[0]:                 nrows to be sent
3128                [1:nrows]:           row index (global)
3129                [nrows+1:2*nrows+1]: i-structure index
3130     */
3131     /*-------------------------------------------*/
3132     nrows = len_si[proc]/2 - 1;
3133     buf_si_i    = buf_si + nrows+1;
3134     buf_si[0]   = nrows;
3135     buf_si_i[0] = 0;
3136     nrows = 0;
3137     for (i=owners[proc]; i<owners[proc+1]; i++){
3138       anzi = ai[i+1] - ai[i];
3139       if (anzi) {
3140         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3141         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3142         nrows++;
3143       }
3144     }
3145     ierr = MPI_Isend(buf_si,len_si[proc],MPI_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3146     k++;
3147     buf_si += len_si[proc];
3148   }
3149 
3150   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3151   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3152 
3153   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3154   for (i=0; i<merge->nrecv; i++){
3155     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%d, len_rj=%d from [%d]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3156   }
3157 
3158   ierr = PetscFree(len_si);CHKERRQ(ierr);
3159   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3160   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3161   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3162   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3163   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3164 
3165   /* create seq matrix B_seq in each processor */
3166   /*-------------------------------------------*/
3167   /* allocate bi array and free space for accumulating nonzero column info */
3168   ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3169   bi[0] = 0;
3170 
3171   /* create and initialize a linked list */
3172   nlnk = N+1;
3173   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3174 
3175   /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3176   len = 0;
3177   for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3178   ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3179   current_space = free_space;
3180 
3181   /* determine symbolic info for each row of B_seq */
3182   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(int**),&buf_ri_k);CHKERRQ(ierr);
3183   nextrow = buf_ri_k + merge->nrecv;
3184   nextai  = nextrow + merge->nrecv;
3185   for (k=0; k<merge->nrecv; k++){
3186     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3187     nrows = *buf_ri_k[k];
3188     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3189     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3190   }
3191 
3192   for (i=0;i<m;i++) {
3193     bnzi   = 0;
3194     /* add local non-zero cols of this proc's seqmat into lnk */
3195     arow   = owners[rank] + i;
3196     anzi   = ai[arow+1] - ai[arow];
3197     aj     = a->j + ai[arow];
3198     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3199     bnzi += nlnk;
3200     /* add received col data into lnk */
3201     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3202       if (i == *nextrow[k]) { /* i-th row */
3203         anzi = *(nextai[k]+1) - *nextai[k];
3204         aj   = buf_rj[k] + *nextai[k];
3205         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3206         bnzi += nlnk;
3207         nextrow[k]++; nextai[k]++;
3208       }
3209     }
3210 
3211     /* if free space is not available, make more free space */
3212     if (current_space->local_remaining<bnzi) {
3213       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3214       nspacedouble++;
3215     }
3216     /* copy data into free space, then initialize lnk */
3217     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3218     current_space->array           += bnzi;
3219     current_space->local_used      += bnzi;
3220     current_space->local_remaining -= bnzi;
3221 
3222     bi[i+1] = bi[i] + bnzi;
3223   }
3224   ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3225   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3226 
3227   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3228 
3229   /* allocate space for ba */
3230   ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3231   ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3232 
3233   /* put together B_seq */
3234   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3235   ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3236 
3237   /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3238   /*--------------------------------------------------------------*/
3239   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
3240   ierr = MatMerge(comm,B_seq,n,MAT_INITIAL_MATRIX,&B_mpi);CHKERRQ(ierr);
3241   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3242   merge->bi            = bi;
3243   merge->bj            = bj;
3244   merge->buf_ri        = buf_ri;
3245   merge->buf_rj        = buf_rj;
3246   merge->C_seq         = seqmat;
3247 
3248   /* attach the supporting struct to B_mpi for reuse */
3249   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3250   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3251   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3252   *mpimat = B_mpi;
3253 
3254   ierr = PetscFree(status);CHKERRQ(ierr);
3255   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3256 
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3261 {
3262   PetscErrorCode   ierr;
3263 
3264   PetscFunctionBegin;
3265   if (scall == MAT_INITIAL_MATRIX){
3266     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3267   }
3268   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3269 
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatGetLocalMat"
3275 /*@C
3276      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3277 
3278     Collective on Mat
3279 
3280    Input Parameters:
3281 +    A - the matrix
3282 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3283 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3284 
3285    Output Parameter:
3286 .    A_loc - the local sequential matrix generated
3287 
3288     Level: developer
3289 
3290 @*/
3291 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3292 {
3293   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3294   PetscErrorCode    ierr;
3295   int               *idx,i,start,end,ncols,nzA,nzB,*cmap,imark;
3296   IS                isrowa,iscola;
3297   Mat               *aloc;
3298 
3299   PetscFunctionBegin;
3300   if (!row){
3301     start = a->rstart; end = a->rend;
3302     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3303   } else {
3304     isrowa = *row;
3305   }
3306   if (!col){
3307     start = a->cstart;
3308     cmap  = a->garray;
3309     nzA   = a->A->n;
3310     nzB   = a->B->n;
3311     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3312     ncols = 0;
3313     for (i=0; i<nzB; i++) {
3314       if (cmap[i] < start) idx[ncols++] = cmap[i];
3315       else break;
3316     }
3317     imark = i;
3318     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3319     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3320     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3321     ierr = PetscFree(idx);CHKERRQ(ierr);
3322   } else {
3323     iscola = *col;
3324   }
3325   if (scall != MAT_INITIAL_MATRIX){
3326     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3327     aloc[0] = *A_loc;
3328   }
3329   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3330   *A_loc = aloc[0];
3331   ierr = PetscFree(aloc);CHKERRQ(ierr);
3332   if (!row){
3333     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3334   }
3335   if (!col){
3336     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3337   }
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 #undef __FUNCT__
3342 #define __FUNCT__ "MatGetBrowsOfAcols"
3343 /*@C
3344      MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A
3345 
3346     Collective on Mat
3347 
3348    Input Parameters:
3349 +    A,B - the matrices
3350 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3351 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3352 
3353    Output Parameter:
3354 +    rowb, colb - index sets of rows and columns of B to extract
3355 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3356 -    B_seq - the sequential matrix generated
3357 
3358     Level: developer
3359 
3360 @*/
3361 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3362 {
3363   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3364   PetscErrorCode    ierr;
3365   int               *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3366   IS                isrowb,iscolb;
3367   Mat               *bseq;
3368 
3369   PetscFunctionBegin;
3370   if (a->cstart != b->rstart || a->cend != b->rend){
3371     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3372   }
3373 
3374   if (scall == MAT_INITIAL_MATRIX){
3375     start = a->cstart;
3376     cmap  = a->garray;
3377     nzA   = a->A->n;
3378     nzB   = a->B->n;
3379     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3380     ncols = 0;
3381     for (i=0; i<nzB; i++) {
3382       if (cmap[i] < start) idx[ncols++] = cmap[i];
3383       else break;
3384     }
3385     imark = i;
3386     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3387     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3388     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3389     ierr = PetscFree(idx);CHKERRQ(ierr);
3390     *brstart = imark;
3391     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3392   } else {
3393     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3394     isrowb = *rowb; iscolb = *colb;
3395     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3396     bseq[0] = *B_seq;
3397   }
3398   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3399   *B_seq = bseq[0];
3400   ierr = PetscFree(bseq);CHKERRQ(ierr);
3401   if (!rowb){
3402     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3403   } else {
3404     *rowb = isrowb;
3405   }
3406   if (!colb){
3407     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3408   } else {
3409     *colb = iscolb;
3410   }
3411 
3412   PetscFunctionReturn(0);
3413 }
3414