xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 101f549e073eee4035907c90cbd999f93388f840)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use             = PETSC_FALSE;
399   b->compressedrow.use     = PETSC_TRUE;
400   if (aij->B->bs !=1){
401     /* printf("aij->B->bs: %d, mat: bs=%d, dim=%d, %d, %d, %d\n",aij->B->bs,mat->bs,mat->m,mat->n,mat->M,mat->N);*/
402     aij->B->bs = 1; /* why it could !=1 ? */
403   }
404   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
405   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
406 
407   if (aij->rowvalues) {
408     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
409     aij->rowvalues = 0;
410   }
411 
412   /* used by MatAXPY() */
413   a->xtoy = 0; b->xtoy = 0;
414   a->XtoY = 0; b->XtoY = 0;
415 
416   PetscFunctionReturn(0);
417 }
418 
419 #undef __FUNCT__
420 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
421 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
422 {
423   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
424   PetscErrorCode ierr;
425 
426   PetscFunctionBegin;
427   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
428   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatZeroRows_MPIAIJ"
434 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
435 {
436   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
437   PetscErrorCode ierr;
438   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
439   PetscInt       i,N,*rows,*owners = l->rowners;
440   PetscInt       *nprocs,j,idx,nsends,row;
441   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
442   PetscInt       *rvalues,count,base,slen,*source;
443   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
444   MPI_Comm       comm = A->comm;
445   MPI_Request    *send_waits,*recv_waits;
446   MPI_Status     recv_status,*send_status;
447   IS             istmp;
448   PetscTruth     found;
449 
450   PetscFunctionBegin;
451   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
452   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
453 
454   /*  first count number of contributors to each processor */
455   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
456   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
457   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
458   for (i=0; i<N; i++) {
459     idx = rows[i];
460     found = PETSC_FALSE;
461     for (j=0; j<size; j++) {
462       if (idx >= owners[j] && idx < owners[j+1]) {
463         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
464       }
465     }
466     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
467   }
468   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
469 
470   /* inform other processors of number of messages and max length*/
471   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
472 
473   /* post receives:   */
474   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
476   for (i=0; i<nrecvs; i++) {
477     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
478   }
479 
480   /* do sends:
481       1) starts[i] gives the starting index in svalues for stuff going to
482          the ith processor
483   */
484   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
485   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
486   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
487   starts[0] = 0;
488   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
489   for (i=0; i<N; i++) {
490     svalues[starts[owner[i]]++] = rows[i];
491   }
492   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
493 
494   starts[0] = 0;
495   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
496   count = 0;
497   for (i=0; i<size; i++) {
498     if (nprocs[2*i+1]) {
499       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
500     }
501   }
502   ierr = PetscFree(starts);CHKERRQ(ierr);
503 
504   base = owners[rank];
505 
506   /*  wait on receives */
507   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
508   source = lens + nrecvs;
509   count  = nrecvs; slen = 0;
510   while (count) {
511     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
512     /* unpack receives into our local space */
513     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
514     source[imdex]  = recv_status.MPI_SOURCE;
515     lens[imdex]    = n;
516     slen          += n;
517     count--;
518   }
519   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
520 
521   /* move the data into the send scatter */
522   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
523   count = 0;
524   for (i=0; i<nrecvs; i++) {
525     values = rvalues + i*nmax;
526     for (j=0; j<lens[i]; j++) {
527       lrows[count++] = values[j] - base;
528     }
529   }
530   ierr = PetscFree(rvalues);CHKERRQ(ierr);
531   ierr = PetscFree(lens);CHKERRQ(ierr);
532   ierr = PetscFree(owner);CHKERRQ(ierr);
533   ierr = PetscFree(nprocs);CHKERRQ(ierr);
534 
535   /* actually zap the local rows */
536   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
537   PetscLogObjectParent(A,istmp);
538 
539   /*
540         Zero the required rows. If the "diagonal block" of the matrix
541      is square and the user wishes to set the diagonal we use seperate
542      code so that MatSetValues() is not called for each diagonal allocating
543      new memory, thus calling lots of mallocs and slowing things down.
544 
545        Contributed by: Mathew Knepley
546   */
547   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
548   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
549   if (diag && (l->A->M == l->A->N)) {
550     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
551   } else if (diag) {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
554       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
555 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
556     }
557     for (i = 0; i < slen; i++) {
558       row  = lrows[i] + rstart;
559       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
560     }
561     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
562     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
563   } else {
564     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
565   }
566   ierr = ISDestroy(istmp);CHKERRQ(ierr);
567   ierr = PetscFree(lrows);CHKERRQ(ierr);
568 
569   /* wait on sends */
570   if (nsends) {
571     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
572     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
573     ierr = PetscFree(send_status);CHKERRQ(ierr);
574   }
575   ierr = PetscFree(send_waits);CHKERRQ(ierr);
576   ierr = PetscFree(svalues);CHKERRQ(ierr);
577 
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatMult_MPIAIJ"
583 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
584 {
585   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
586   PetscErrorCode ierr;
587   PetscInt       nt;
588 
589   PetscFunctionBegin;
590   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
591   if (nt != A->n) {
592     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
593   }
594   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
595   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
596   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
598   PetscFunctionReturn(0);
599 }
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatMultAdd_MPIAIJ"
603 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
604 {
605   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
606   PetscErrorCode ierr;
607 
608   PetscFunctionBegin;
609   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
610   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
611   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
618 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
619 {
620   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
621   PetscErrorCode ierr;
622 
623   PetscFunctionBegin;
624   /* do nondiagonal part */
625   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
626   /* send it on its way */
627   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
628   /* do local part */
629   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
630   /* receive remote parts: note this assumes the values are not actually */
631   /* inserted in yy until the next line, which is true for my implementation*/
632   /* but is not perhaps always true. */
633   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
634   PetscFunctionReturn(0);
635 }
636 
637 EXTERN_C_BEGIN
638 #undef __FUNCT__
639 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
640 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
641 {
642   MPI_Comm       comm;
643   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
644   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
645   IS             Me,Notme;
646   PetscErrorCode ierr;
647   PetscInt       M,N,first,last,*notme,i;
648   PetscMPIInt    size;
649 
650   PetscFunctionBegin;
651 
652   /* Easy test: symmetric diagonal block */
653   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
654   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
655   if (!*f) PetscFunctionReturn(0);
656   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
658   if (size == 1) PetscFunctionReturn(0);
659 
660   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
661   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
662   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
663   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
664   for (i=0; i<first; i++) notme[i] = i;
665   for (i=last; i<M; i++) notme[i-last+first] = i;
666   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
667   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
668   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
669   Aoff = Aoffs[0];
670   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
671   Boff = Boffs[0];
672   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
673   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
674   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
675   ierr = ISDestroy(Me);CHKERRQ(ierr);
676   ierr = ISDestroy(Notme);CHKERRQ(ierr);
677 
678   PetscFunctionReturn(0);
679 }
680 EXTERN_C_END
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
684 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
685 {
686   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
687   PetscErrorCode ierr;
688 
689   PetscFunctionBegin;
690   /* do nondiagonal part */
691   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
692   /* send it on its way */
693   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
694   /* do local part */
695   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
696   /* receive remote parts: note this assumes the values are not actually */
697   /* inserted in yy until the next line, which is true for my implementation*/
698   /* but is not perhaps always true. */
699   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 /*
704   This only works correctly for square matrices where the subblock A->A is the
705    diagonal block
706 */
707 #undef __FUNCT__
708 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
709 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
710 {
711   PetscErrorCode ierr;
712   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
713 
714   PetscFunctionBegin;
715   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
716   if (a->rstart != a->cstart || a->rend != a->cend) {
717     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
718   }
719   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatScale_MPIAIJ"
725 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
726 {
727   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
732   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
733   PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatDestroy_MPIAIJ"
738 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
739 {
740   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
741   PetscErrorCode ierr;
742 
743   PetscFunctionBegin;
744 #if defined(PETSC_USE_LOG)
745   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
746 #endif
747   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
748   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
749   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
750   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
751 #if defined (PETSC_USE_CTABLE)
752   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
753 #else
754   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
755 #endif
756   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
757   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
758   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
759   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
760   ierr = PetscFree(aij);CHKERRQ(ierr);
761 
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
765   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
769   PetscFunctionReturn(0);
770 }
771 
772 #undef __FUNCT__
773 #define __FUNCT__ "MatView_MPIAIJ_Binary"
774 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
775 {
776   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
777   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
778   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
779   PetscErrorCode    ierr;
780   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
781   int               fd;
782   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
783   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
784   PetscScalar       *column_values;
785 
786   PetscFunctionBegin;
787   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
788   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
789   nz   = A->nz + B->nz;
790   if (!rank) {
791     header[0] = MAT_FILE_COOKIE;
792     header[1] = mat->M;
793     header[2] = mat->N;
794     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
795     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
796     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
797     /* get largest number of rows any processor has */
798     rlen = mat->m;
799     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
800     for (i=1; i<size; i++) {
801       rlen = PetscMax(rlen,range[i+1] - range[i]);
802     }
803   } else {
804     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
805     rlen = mat->m;
806   }
807 
808   /* load up the local row counts */
809   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
810   for (i=0; i<mat->m; i++) {
811     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
812   }
813 
814   /* store the row lengths to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       rlen = range[i+1] - range[i];
820       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
822     }
823   } else {
824     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
827 
828   /* load up the local column indices */
829   nzmax = nz; /* )th processor needs space a largest processor needs */
830   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->m; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( (col = garray[B->j[j]]) > cstart) break;
836       column_indices[cnt++] = col;
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_indices[cnt++] = A->j[k] + cstart;
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_indices[cnt++] = garray[B->j[j]];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column indices to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_indices);CHKERRQ(ierr);
862 
863   /* load up the local column values */
864   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
865   cnt  = 0;
866   for (i=0; i<mat->m; i++) {
867     for (j=B->i[i]; j<B->i[i+1]; j++) {
868       if ( garray[B->j[j]] > cstart) break;
869       column_values[cnt++] = B->a[j];
870     }
871     for (k=A->i[i]; k<A->i[i+1]; k++) {
872       column_values[cnt++] = A->a[k];
873     }
874     for (; j<B->i[i+1]; j++) {
875       column_values[cnt++] = B->a[j];
876     }
877   }
878   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
879 
880   /* store the column values to the file */
881   if (!rank) {
882     MPI_Status status;
883     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
884     for (i=1; i<size; i++) {
885       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
886       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
887       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
888       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
889     }
890   } else {
891     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
892     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
893   }
894   ierr = PetscFree(column_values);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
900 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
901 {
902   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
903   PetscErrorCode    ierr;
904   PetscMPIInt       rank = aij->rank,size = aij->size;
905   PetscTruth        isdraw,iascii,flg,isbinary;
906   PetscViewer       sviewer;
907   PetscViewerFormat format;
908 
909   PetscFunctionBegin;
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
912   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
913   if (iascii) {
914     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
915     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
916       MatInfo info;
917       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
918       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
919       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
920       if (flg) {
921         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
922 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
923       } else {
924         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
925 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
926       }
927       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
928       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
929       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
930       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
931       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
932       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
933       PetscFunctionReturn(0);
934     } else if (format == PETSC_VIEWER_ASCII_INFO) {
935       PetscFunctionReturn(0);
936     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
937       PetscFunctionReturn(0);
938     }
939   } else if (isbinary) {
940     if (size == 1) {
941       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
942       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
943     } else {
944       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
945     }
946     PetscFunctionReturn(0);
947   } else if (isdraw) {
948     PetscDraw  draw;
949     PetscTruth isnull;
950     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
951     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
952   }
953 
954   if (size == 1) {
955     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
956     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
957   } else {
958     /* assemble the entire matrix onto first processor. */
959     Mat         A;
960     Mat_SeqAIJ  *Aloc;
961     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
962     PetscScalar *a;
963 
964     if (!rank) {
965       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
966     } else {
967       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
968     }
969     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
970     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
971     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
972     PetscLogObjectParent(mat,A);
973 
974     /* copy over the A part */
975     Aloc = (Mat_SeqAIJ*)aij->A->data;
976     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
977     row = aij->rstart;
978     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
979     for (i=0; i<m; i++) {
980       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
981       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
982     }
983     aj = Aloc->j;
984     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
985 
986     /* copy over the B part */
987     Aloc = (Mat_SeqAIJ*)aij->B->data;
988     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
989     row  = aij->rstart;
990     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
991     ct   = cols;
992     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
993     for (i=0; i<m; i++) {
994       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
995       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
996     }
997     ierr = PetscFree(ct);CHKERRQ(ierr);
998     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
999     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1000     /*
1001        Everyone has to call to draw the matrix since the graphics waits are
1002        synchronized across all processors that share the PetscDraw object
1003     */
1004     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1005     if (!rank) {
1006       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1007       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1008     }
1009     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1010     ierr = MatDestroy(A);CHKERRQ(ierr);
1011   }
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatView_MPIAIJ"
1017 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1018 {
1019   PetscErrorCode ierr;
1020   PetscTruth     iascii,isdraw,issocket,isbinary;
1021 
1022   PetscFunctionBegin;
1023   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1024   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1025   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1026   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1027   if (iascii || isdraw || isbinary || issocket) {
1028     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1029   } else {
1030     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1031   }
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatRelax_MPIAIJ"
1039 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1040 {
1041   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1042   PetscErrorCode ierr;
1043   Vec            bb1;
1044   PetscScalar    mone=-1.0;
1045 
1046   PetscFunctionBegin;
1047   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1048 
1049   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1050 
1051   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1052     if (flag & SOR_ZERO_INITIAL_GUESS) {
1053       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1054       its--;
1055     }
1056 
1057     while (its--) {
1058       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1059       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1060 
1061       /* update rhs: bb1 = bb - B*x */
1062       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1063       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1064 
1065       /* local sweep */
1066       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1067       CHKERRQ(ierr);
1068     }
1069   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1070     if (flag & SOR_ZERO_INITIAL_GUESS) {
1071       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1072       its--;
1073     }
1074     while (its--) {
1075       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1076       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1077 
1078       /* update rhs: bb1 = bb - B*x */
1079       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1080       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1081 
1082       /* local sweep */
1083       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1084       CHKERRQ(ierr);
1085     }
1086   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1087     if (flag & SOR_ZERO_INITIAL_GUESS) {
1088       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1089       its--;
1090     }
1091     while (its--) {
1092       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1093       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1094 
1095       /* update rhs: bb1 = bb - B*x */
1096       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1097       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1098 
1099       /* local sweep */
1100       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1101       CHKERRQ(ierr);
1102     }
1103   } else {
1104     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1105   }
1106 
1107   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1113 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1114 {
1115   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1116   Mat            A = mat->A,B = mat->B;
1117   PetscErrorCode ierr;
1118   PetscReal      isend[5],irecv[5];
1119 
1120   PetscFunctionBegin;
1121   info->block_size     = 1.0;
1122   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1123   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1124   isend[3] = info->memory;  isend[4] = info->mallocs;
1125   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1126   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1127   isend[3] += info->memory;  isend[4] += info->mallocs;
1128   if (flag == MAT_LOCAL) {
1129     info->nz_used      = isend[0];
1130     info->nz_allocated = isend[1];
1131     info->nz_unneeded  = isend[2];
1132     info->memory       = isend[3];
1133     info->mallocs      = isend[4];
1134   } else if (flag == MAT_GLOBAL_MAX) {
1135     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1136     info->nz_used      = irecv[0];
1137     info->nz_allocated = irecv[1];
1138     info->nz_unneeded  = irecv[2];
1139     info->memory       = irecv[3];
1140     info->mallocs      = irecv[4];
1141   } else if (flag == MAT_GLOBAL_SUM) {
1142     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1143     info->nz_used      = irecv[0];
1144     info->nz_allocated = irecv[1];
1145     info->nz_unneeded  = irecv[2];
1146     info->memory       = irecv[3];
1147     info->mallocs      = irecv[4];
1148   }
1149   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1150   info->fill_ratio_needed = 0;
1151   info->factor_mallocs    = 0;
1152   info->rows_global       = (double)matin->M;
1153   info->columns_global    = (double)matin->N;
1154   info->rows_local        = (double)matin->m;
1155   info->columns_local     = (double)matin->N;
1156 
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatSetOption_MPIAIJ"
1162 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1163 {
1164   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   switch (op) {
1169   case MAT_NO_NEW_NONZERO_LOCATIONS:
1170   case MAT_YES_NEW_NONZERO_LOCATIONS:
1171   case MAT_COLUMNS_UNSORTED:
1172   case MAT_COLUMNS_SORTED:
1173   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1174   case MAT_KEEP_ZEROED_ROWS:
1175   case MAT_NEW_NONZERO_LOCATION_ERR:
1176   case MAT_USE_INODES:
1177   case MAT_DO_NOT_USE_INODES:
1178   case MAT_IGNORE_ZERO_ENTRIES:
1179     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1180     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1181     break;
1182   case MAT_ROW_ORIENTED:
1183     a->roworiented = PETSC_TRUE;
1184     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1185     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1186     break;
1187   case MAT_ROWS_SORTED:
1188   case MAT_ROWS_UNSORTED:
1189   case MAT_YES_NEW_DIAGONALS:
1190     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1191     break;
1192   case MAT_COLUMN_ORIENTED:
1193     a->roworiented = PETSC_FALSE;
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1196     break;
1197   case MAT_IGNORE_OFF_PROC_ENTRIES:
1198     a->donotstash = PETSC_TRUE;
1199     break;
1200   case MAT_NO_NEW_DIAGONALS:
1201     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1202   case MAT_SYMMETRIC:
1203   case MAT_STRUCTURALLY_SYMMETRIC:
1204   case MAT_HERMITIAN:
1205   case MAT_SYMMETRY_ETERNAL:
1206     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1207     break;
1208   case MAT_NOT_SYMMETRIC:
1209   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1210   case MAT_NOT_HERMITIAN:
1211   case MAT_NOT_SYMMETRY_ETERNAL:
1212     break;
1213   default:
1214     SETERRQ(PETSC_ERR_SUP,"unknown option");
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatGetRow_MPIAIJ"
1221 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1222 {
1223   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1224   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1225   PetscErrorCode ierr;
1226   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1227   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1228   PetscInt       *cmap,*idx_p;
1229 
1230   PetscFunctionBegin;
1231   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1232   mat->getrowactive = PETSC_TRUE;
1233 
1234   if (!mat->rowvalues && (idx || v)) {
1235     /*
1236         allocate enough space to hold information from the longest row.
1237     */
1238     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1239     PetscInt     max = 1,tmp;
1240     for (i=0; i<matin->m; i++) {
1241       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1242       if (max < tmp) { max = tmp; }
1243     }
1244     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1245     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1246   }
1247 
1248   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1249   lrow = row - rstart;
1250 
1251   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1252   if (!v)   {pvA = 0; pvB = 0;}
1253   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1254   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1255   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1256   nztot = nzA + nzB;
1257 
1258   cmap  = mat->garray;
1259   if (v  || idx) {
1260     if (nztot) {
1261       /* Sort by increasing column numbers, assuming A and B already sorted */
1262       PetscInt imark = -1;
1263       if (v) {
1264         *v = v_p = mat->rowvalues;
1265         for (i=0; i<nzB; i++) {
1266           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1267           else break;
1268         }
1269         imark = i;
1270         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1271         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1272       }
1273       if (idx) {
1274         *idx = idx_p = mat->rowindices;
1275         if (imark > -1) {
1276           for (i=0; i<imark; i++) {
1277             idx_p[i] = cmap[cworkB[i]];
1278           }
1279         } else {
1280           for (i=0; i<nzB; i++) {
1281             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1282             else break;
1283           }
1284           imark = i;
1285         }
1286         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1287         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1288       }
1289     } else {
1290       if (idx) *idx = 0;
1291       if (v)   *v   = 0;
1292     }
1293   }
1294   *nz = nztot;
1295   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1296   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1302 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1303 {
1304   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1305 
1306   PetscFunctionBegin;
1307   if (!aij->getrowactive) {
1308     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1309   }
1310   aij->getrowactive = PETSC_FALSE;
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatNorm_MPIAIJ"
1316 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1317 {
1318   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1319   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1320   PetscErrorCode ierr;
1321   PetscInt       i,j,cstart = aij->cstart;
1322   PetscReal      sum = 0.0;
1323   PetscScalar    *v;
1324 
1325   PetscFunctionBegin;
1326   if (aij->size == 1) {
1327     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1328   } else {
1329     if (type == NORM_FROBENIUS) {
1330       v = amat->a;
1331       for (i=0; i<amat->nz; i++) {
1332 #if defined(PETSC_USE_COMPLEX)
1333         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334 #else
1335         sum += (*v)*(*v); v++;
1336 #endif
1337       }
1338       v = bmat->a;
1339       for (i=0; i<bmat->nz; i++) {
1340 #if defined(PETSC_USE_COMPLEX)
1341         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1342 #else
1343         sum += (*v)*(*v); v++;
1344 #endif
1345       }
1346       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1347       *norm = sqrt(*norm);
1348     } else if (type == NORM_1) { /* max column norm */
1349       PetscReal *tmp,*tmp2;
1350       PetscInt    *jj,*garray = aij->garray;
1351       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1352       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1353       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1354       *norm = 0.0;
1355       v = amat->a; jj = amat->j;
1356       for (j=0; j<amat->nz; j++) {
1357         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1358       }
1359       v = bmat->a; jj = bmat->j;
1360       for (j=0; j<bmat->nz; j++) {
1361         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1362       }
1363       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1364       for (j=0; j<mat->N; j++) {
1365         if (tmp2[j] > *norm) *norm = tmp2[j];
1366       }
1367       ierr = PetscFree(tmp);CHKERRQ(ierr);
1368       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1369     } else if (type == NORM_INFINITY) { /* max row norm */
1370       PetscReal ntemp = 0.0;
1371       for (j=0; j<aij->A->m; j++) {
1372         v = amat->a + amat->i[j];
1373         sum = 0.0;
1374         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1375           sum += PetscAbsScalar(*v); v++;
1376         }
1377         v = bmat->a + bmat->i[j];
1378         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1379           sum += PetscAbsScalar(*v); v++;
1380         }
1381         if (sum > ntemp) ntemp = sum;
1382       }
1383       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1384     } else {
1385       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1386     }
1387   }
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 #undef __FUNCT__
1392 #define __FUNCT__ "MatTranspose_MPIAIJ"
1393 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1394 {
1395   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1396   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1397   PetscErrorCode ierr;
1398   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1399   Mat            B;
1400   PetscScalar    *array;
1401 
1402   PetscFunctionBegin;
1403   if (!matout && M != N) {
1404     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1405   }
1406 
1407   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1408   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1409   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1410 
1411   /* copy over the A part */
1412   Aloc = (Mat_SeqAIJ*)a->A->data;
1413   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1414   row = a->rstart;
1415   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1416   for (i=0; i<m; i++) {
1417     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1418     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1419   }
1420   aj = Aloc->j;
1421   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1422 
1423   /* copy over the B part */
1424   Aloc = (Mat_SeqAIJ*)a->B->data;
1425   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1426   row  = a->rstart;
1427   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1428   ct   = cols;
1429   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1430   for (i=0; i<m; i++) {
1431     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1432     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1433   }
1434   ierr = PetscFree(ct);CHKERRQ(ierr);
1435   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437   if (matout) {
1438     *matout = B;
1439   } else {
1440     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1441   }
1442   PetscFunctionReturn(0);
1443 }
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1447 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1448 {
1449   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1450   Mat            a = aij->A,b = aij->B;
1451   PetscErrorCode ierr;
1452   PetscInt       s1,s2,s3;
1453 
1454   PetscFunctionBegin;
1455   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1456   if (rr) {
1457     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1458     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1459     /* Overlap communication with computation. */
1460     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1461   }
1462   if (ll) {
1463     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1464     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1465     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1466   }
1467   /* scale  the diagonal block */
1468   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1469 
1470   if (rr) {
1471     /* Do a scatter end and then right scale the off-diagonal block */
1472     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1473     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1474   }
1475 
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1482 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1483 {
1484   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   if (!a->rank) {
1489     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1496 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1497 {
1498   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1503   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 #undef __FUNCT__
1507 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1508 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1509 {
1510   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1511   PetscErrorCode ierr;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatEqual_MPIAIJ"
1520 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1521 {
1522   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1523   Mat            a,b,c,d;
1524   PetscTruth     flg;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   a = matA->A; b = matA->B;
1529   c = matB->A; d = matB->B;
1530 
1531   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1532   if (flg) {
1533     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1534   }
1535   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatCopy_MPIAIJ"
1541 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1542 {
1543   PetscErrorCode ierr;
1544   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1545   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1546 
1547   PetscFunctionBegin;
1548   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1549   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1550     /* because of the column compression in the off-processor part of the matrix a->B,
1551        the number of columns in a->B and b->B may be different, hence we cannot call
1552        the MatCopy() directly on the two parts. If need be, we can provide a more
1553        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1554        then copying the submatrices */
1555     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1556   } else {
1557     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1558     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1559   }
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1565 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1566 {
1567   PetscErrorCode ierr;
1568 
1569   PetscFunctionBegin;
1570   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #include "petscblaslapack.h"
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatAXPY_MPIAIJ"
1577 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1578 {
1579   PetscErrorCode ierr;
1580   PetscInt       i;
1581   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1582   PetscBLASInt   bnz,one=1;
1583   Mat_SeqAIJ     *x,*y;
1584 
1585   PetscFunctionBegin;
1586   if (str == SAME_NONZERO_PATTERN) {
1587     x = (Mat_SeqAIJ *)xx->A->data;
1588     y = (Mat_SeqAIJ *)yy->A->data;
1589     bnz = (PetscBLASInt)x->nz;
1590     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1591     x = (Mat_SeqAIJ *)xx->B->data;
1592     y = (Mat_SeqAIJ *)yy->B->data;
1593     bnz = (PetscBLASInt)x->nz;
1594     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1595   } else if (str == SUBSET_NONZERO_PATTERN) {
1596     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1597 
1598     x = (Mat_SeqAIJ *)xx->B->data;
1599     y = (Mat_SeqAIJ *)yy->B->data;
1600     if (y->xtoy && y->XtoY != xx->B) {
1601       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1602       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1603     }
1604     if (!y->xtoy) { /* get xtoy */
1605       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1606       y->XtoY = xx->B;
1607     }
1608     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1609   } else {
1610     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 /* -------------------------------------------------------------------*/
1616 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1617        MatGetRow_MPIAIJ,
1618        MatRestoreRow_MPIAIJ,
1619        MatMult_MPIAIJ,
1620 /* 4*/ MatMultAdd_MPIAIJ,
1621        MatMultTranspose_MPIAIJ,
1622        MatMultTransposeAdd_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626 /*10*/ 0,
1627        0,
1628        0,
1629        MatRelax_MPIAIJ,
1630        MatTranspose_MPIAIJ,
1631 /*15*/ MatGetInfo_MPIAIJ,
1632        MatEqual_MPIAIJ,
1633        MatGetDiagonal_MPIAIJ,
1634        MatDiagonalScale_MPIAIJ,
1635        MatNorm_MPIAIJ,
1636 /*20*/ MatAssemblyBegin_MPIAIJ,
1637        MatAssemblyEnd_MPIAIJ,
1638        0,
1639        MatSetOption_MPIAIJ,
1640        MatZeroEntries_MPIAIJ,
1641 /*25*/ MatZeroRows_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646 /*30*/ MatSetUpPreallocation_MPIAIJ,
1647        0,
1648        0,
1649        0,
1650        0,
1651 /*35*/ MatDuplicate_MPIAIJ,
1652        0,
1653        0,
1654        0,
1655        0,
1656 /*40*/ MatAXPY_MPIAIJ,
1657        MatGetSubMatrices_MPIAIJ,
1658        MatIncreaseOverlap_MPIAIJ,
1659        MatGetValues_MPIAIJ,
1660        MatCopy_MPIAIJ,
1661 /*45*/ MatPrintHelp_MPIAIJ,
1662        MatScale_MPIAIJ,
1663        0,
1664        0,
1665        0,
1666 /*50*/ MatSetBlockSize_MPIAIJ,
1667        0,
1668        0,
1669        0,
1670        0,
1671 /*55*/ MatFDColoringCreate_MPIAIJ,
1672        0,
1673        MatSetUnfactored_MPIAIJ,
1674        0,
1675        0,
1676 /*60*/ MatGetSubMatrix_MPIAIJ,
1677        MatDestroy_MPIAIJ,
1678        MatView_MPIAIJ,
1679        MatGetPetscMaps_Petsc,
1680        0,
1681 /*65*/ 0,
1682        0,
1683        0,
1684        0,
1685        0,
1686 /*70*/ 0,
1687        0,
1688        MatSetColoring_MPIAIJ,
1689 #if defined(PETSC_HAVE_ADIC)
1690        MatSetValuesAdic_MPIAIJ,
1691 #else
1692        0,
1693 #endif
1694        MatSetValuesAdifor_MPIAIJ,
1695 /*75*/ 0,
1696        0,
1697        0,
1698        0,
1699        0,
1700 /*80*/ 0,
1701        0,
1702        0,
1703        0,
1704 /*84*/ MatLoad_MPIAIJ,
1705        0,
1706        0,
1707        0,
1708        0,
1709        0,
1710 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1711        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1712        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1713        MatPtAP_MPIAIJ_MPIAIJ,
1714        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1715 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1716        0,
1717        0,
1718        0};
1719 
1720 /* ----------------------------------------------------------------------------------------*/
1721 
1722 EXTERN_C_BEGIN
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1725 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1726 {
1727   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1732   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1733   PetscFunctionReturn(0);
1734 }
1735 EXTERN_C_END
1736 
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1740 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1741 {
1742   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1747   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 EXTERN_C_END
1751 
1752 #include "petscpc.h"
1753 EXTERN_C_BEGIN
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1756 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1757 {
1758   Mat_MPIAIJ     *b;
1759   PetscErrorCode ierr;
1760   PetscInt       i;
1761 
1762   PetscFunctionBegin;
1763   B->preallocated = PETSC_TRUE;
1764   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1765   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1766   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1767   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1768   if (d_nnz) {
1769     for (i=0; i<B->m; i++) {
1770       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1771     }
1772   }
1773   if (o_nnz) {
1774     for (i=0; i<B->m; i++) {
1775       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1776     }
1777   }
1778   b = (Mat_MPIAIJ*)B->data;
1779   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1780   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1781 
1782   PetscFunctionReturn(0);
1783 }
1784 EXTERN_C_END
1785 
1786 /*MC
1787    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1788 
1789    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1790    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1791   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1792   for communicators controlling multiple processes.  It is recommended that you call both of
1793   the above preallocation routines for simplicity.
1794 
1795    Options Database Keys:
1796 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1797 
1798   Level: beginner
1799 
1800 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1801 M*/
1802 
1803 EXTERN_C_BEGIN
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatCreate_AIJ"
1806 PetscErrorCode MatCreate_AIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    size;
1810 
1811   PetscFunctionBegin;
1812   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1813   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1825 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1826 {
1827   Mat            mat;
1828   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1829   PetscErrorCode ierr;
1830 
1831   PetscFunctionBegin;
1832   *newmat       = 0;
1833   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1834   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1835   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1836   a    = (Mat_MPIAIJ*)mat->data;
1837 
1838   mat->factor       = matin->factor;
1839   mat->bs           = matin->bs;
1840   mat->assembled    = PETSC_TRUE;
1841   mat->insertmode   = NOT_SET_VALUES;
1842   mat->preallocated = PETSC_TRUE;
1843 
1844   a->rstart       = oldmat->rstart;
1845   a->rend         = oldmat->rend;
1846   a->cstart       = oldmat->cstart;
1847   a->cend         = oldmat->cend;
1848   a->size         = oldmat->size;
1849   a->rank         = oldmat->rank;
1850   a->donotstash   = oldmat->donotstash;
1851   a->roworiented  = oldmat->roworiented;
1852   a->rowindices   = 0;
1853   a->rowvalues    = 0;
1854   a->getrowactive = PETSC_FALSE;
1855 
1856   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1857   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1858   if (oldmat->colmap) {
1859 #if defined (PETSC_USE_CTABLE)
1860     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1861 #else
1862     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1863     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1864     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1865 #endif
1866   } else a->colmap = 0;
1867   if (oldmat->garray) {
1868     PetscInt len;
1869     len  = oldmat->B->n;
1870     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1871     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1872     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1873   } else a->garray = 0;
1874 
1875   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1876   PetscLogObjectParent(mat,a->lvec);
1877   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1878   PetscLogObjectParent(mat,a->Mvctx);
1879   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1880   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1881   PetscLogObjectParent(mat,a->A);
1882   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1883   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1884   PetscLogObjectParent(mat,a->B);
1885   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1886   *newmat = mat;
1887   PetscFunctionReturn(0);
1888 }
1889 
1890 #include "petscsys.h"
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatLoad_MPIAIJ"
1894 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1895 {
1896   Mat            A;
1897   PetscScalar    *vals,*svals;
1898   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1899   MPI_Status     status;
1900   PetscErrorCode ierr;
1901   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1902   PetscInt       i,nz,j,rstart,rend;
1903   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1904   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1905   PetscInt       cend,cstart,n,*rowners;
1906   int            fd;
1907 
1908   PetscFunctionBegin;
1909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1910   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1911   if (!rank) {
1912     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1913     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1914     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1915   }
1916 
1917   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1918   M = header[1]; N = header[2];
1919   /* determine ownership of all rows */
1920   m    = M/size + ((M % size) > rank);
1921   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1922   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1923   rowners[0] = 0;
1924   for (i=2; i<=size; i++) {
1925     rowners[i] += rowners[i-1];
1926   }
1927   rstart = rowners[rank];
1928   rend   = rowners[rank+1];
1929 
1930   /* distribute row lengths to all processors */
1931   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1932   if (!rank) {
1933     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1934     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1935     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1936     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1937     for (j=0; j<m; j++) {
1938       procsnz[0] += ourlens[j];
1939     }
1940     for (i=1; i<size; i++) {
1941       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1942       /* calculate the number of nonzeros on each processor */
1943       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1944         procsnz[i] += rowlengths[j];
1945       }
1946       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1947     }
1948     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1949   } else {
1950     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1951   }
1952 
1953   if (!rank) {
1954     /* determine max buffer needed and allocate it */
1955     maxnz = 0;
1956     for (i=0; i<size; i++) {
1957       maxnz = PetscMax(maxnz,procsnz[i]);
1958     }
1959     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1960 
1961     /* read in my part of the matrix column indices  */
1962     nz   = procsnz[0];
1963     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1964     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1965 
1966     /* read in every one elses and ship off */
1967     for (i=1; i<size; i++) {
1968       nz   = procsnz[i];
1969       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1970       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1971     }
1972     ierr = PetscFree(cols);CHKERRQ(ierr);
1973   } else {
1974     /* determine buffer space needed for message */
1975     nz = 0;
1976     for (i=0; i<m; i++) {
1977       nz += ourlens[i];
1978     }
1979     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1980 
1981     /* receive message of column indices*/
1982     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1983     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1984     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1985   }
1986 
1987   /* determine column ownership if matrix is not square */
1988   if (N != M) {
1989     n      = N/size + ((N % size) > rank);
1990     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1991     cstart = cend - n;
1992   } else {
1993     cstart = rstart;
1994     cend   = rend;
1995     n      = cend - cstart;
1996   }
1997 
1998   /* loop over local rows, determining number of off diagonal entries */
1999   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2000   jj = 0;
2001   for (i=0; i<m; i++) {
2002     for (j=0; j<ourlens[i]; j++) {
2003       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2004       jj++;
2005     }
2006   }
2007 
2008   /* create our matrix */
2009   for (i=0; i<m; i++) {
2010     ourlens[i] -= offlens[i];
2011   }
2012   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2013   ierr = MatSetType(A,type);CHKERRQ(ierr);
2014   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2015 
2016   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2017   for (i=0; i<m; i++) {
2018     ourlens[i] += offlens[i];
2019   }
2020 
2021   if (!rank) {
2022     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2023 
2024     /* read in my part of the matrix numerical values  */
2025     nz   = procsnz[0];
2026     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2027 
2028     /* insert into matrix */
2029     jj      = rstart;
2030     smycols = mycols;
2031     svals   = vals;
2032     for (i=0; i<m; i++) {
2033       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2034       smycols += ourlens[i];
2035       svals   += ourlens[i];
2036       jj++;
2037     }
2038 
2039     /* read in other processors and ship out */
2040     for (i=1; i<size; i++) {
2041       nz   = procsnz[i];
2042       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2043       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2044     }
2045     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2046   } else {
2047     /* receive numeric values */
2048     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2049 
2050     /* receive message of values*/
2051     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2052     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2053     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2054 
2055     /* insert into matrix */
2056     jj      = rstart;
2057     smycols = mycols;
2058     svals   = vals;
2059     for (i=0; i<m; i++) {
2060       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2061       smycols += ourlens[i];
2062       svals   += ourlens[i];
2063       jj++;
2064     }
2065   }
2066   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2067   ierr = PetscFree(vals);CHKERRQ(ierr);
2068   ierr = PetscFree(mycols);CHKERRQ(ierr);
2069   ierr = PetscFree(rowners);CHKERRQ(ierr);
2070 
2071   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   *newmat = A;
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2079 /*
2080     Not great since it makes two copies of the submatrix, first an SeqAIJ
2081   in local and then by concatenating the local matrices the end result.
2082   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2083 */
2084 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2085 {
2086   PetscErrorCode ierr;
2087   PetscMPIInt    rank,size;
2088   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2089   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2090   Mat            *local,M,Mreuse;
2091   PetscScalar    *vwork,*aa;
2092   MPI_Comm       comm = mat->comm;
2093   Mat_SeqAIJ     *aij;
2094 
2095 
2096   PetscFunctionBegin;
2097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2099 
2100   if (call ==  MAT_REUSE_MATRIX) {
2101     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2102     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2103     local = &Mreuse;
2104     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2105   } else {
2106     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2107     Mreuse = *local;
2108     ierr   = PetscFree(local);CHKERRQ(ierr);
2109   }
2110 
2111   /*
2112       m - number of local rows
2113       n - number of columns (same on all processors)
2114       rstart - first row in new global matrix generated
2115   */
2116   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2117   if (call == MAT_INITIAL_MATRIX) {
2118     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2119     ii  = aij->i;
2120     jj  = aij->j;
2121 
2122     /*
2123         Determine the number of non-zeros in the diagonal and off-diagonal
2124         portions of the matrix in order to do correct preallocation
2125     */
2126 
2127     /* first get start and end of "diagonal" columns */
2128     if (csize == PETSC_DECIDE) {
2129       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2130       if (mglobal == n) { /* square matrix */
2131 	nlocal = m;
2132       } else {
2133         nlocal = n/size + ((n % size) > rank);
2134       }
2135     } else {
2136       nlocal = csize;
2137     }
2138     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2139     rstart = rend - nlocal;
2140     if (rank == size - 1 && rend != n) {
2141       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2142     }
2143 
2144     /* next, compute all the lengths */
2145     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2146     olens = dlens + m;
2147     for (i=0; i<m; i++) {
2148       jend = ii[i+1] - ii[i];
2149       olen = 0;
2150       dlen = 0;
2151       for (j=0; j<jend; j++) {
2152         if (*jj < rstart || *jj >= rend) olen++;
2153         else dlen++;
2154         jj++;
2155       }
2156       olens[i] = olen;
2157       dlens[i] = dlen;
2158     }
2159     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2160     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2161     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2162     ierr = PetscFree(dlens);CHKERRQ(ierr);
2163   } else {
2164     PetscInt ml,nl;
2165 
2166     M = *newmat;
2167     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2168     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2169     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2170     /*
2171          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2172        rather than the slower MatSetValues().
2173     */
2174     M->was_assembled = PETSC_TRUE;
2175     M->assembled     = PETSC_FALSE;
2176   }
2177   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2178   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2179   ii  = aij->i;
2180   jj  = aij->j;
2181   aa  = aij->a;
2182   for (i=0; i<m; i++) {
2183     row   = rstart + i;
2184     nz    = ii[i+1] - ii[i];
2185     cwork = jj;     jj += nz;
2186     vwork = aa;     aa += nz;
2187     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2188   }
2189 
2190   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2191   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2192   *newmat = M;
2193 
2194   /* save submatrix used in processor for next request */
2195   if (call ==  MAT_INITIAL_MATRIX) {
2196     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2197     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2198   }
2199 
2200   PetscFunctionReturn(0);
2201 }
2202 
2203 EXTERN_C_BEGIN
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2206 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2207 {
2208   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2209   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2210   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2211   const PetscInt *JJ;
2212   PetscScalar    *values;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216 #if defined(PETSC_OPT_g)
2217   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2218 #endif
2219   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2220   o_nnz = d_nnz + m;
2221 
2222   for (i=0; i<m; i++) {
2223     nnz     = I[i+1]- I[i];
2224     JJ      = J + I[i];
2225     nnz_max = PetscMax(nnz_max,nnz);
2226 #if defined(PETSC_OPT_g)
2227     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2228 #endif
2229     for (j=0; j<nnz; j++) {
2230       if (*JJ >= cstart) break;
2231       JJ++;
2232     }
2233     d = 0;
2234     for (; j<nnz; j++) {
2235       if (*JJ++ >= cend) break;
2236       d++;
2237     }
2238     d_nnz[i] = d;
2239     o_nnz[i] = nnz - d;
2240   }
2241   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2242   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2243 
2244   if (v) values = (PetscScalar*)v;
2245   else {
2246     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2247     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2248   }
2249 
2250   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2251   for (i=0; i<m; i++) {
2252     ii   = i + rstart;
2253     nnz  = I[i+1]- I[i];
2254     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2255   }
2256   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2257   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2259 
2260   if (!v) {
2261     ierr = PetscFree(values);CHKERRQ(ierr);
2262   }
2263   PetscFunctionReturn(0);
2264 }
2265 EXTERN_C_END
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2269 /*@C
2270    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2271    (the default parallel PETSc format).
2272 
2273    Collective on MPI_Comm
2274 
2275    Input Parameters:
2276 +  A - the matrix
2277 .  i - the indices into j for the start of each local row (starts with zero)
2278 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2279 -  v - optional values in the matrix
2280 
2281    Level: developer
2282 
2283 .keywords: matrix, aij, compressed row, sparse, parallel
2284 
2285 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2286 @*/
2287 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2288 {
2289   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2290 
2291   PetscFunctionBegin;
2292   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2293   if (f) {
2294     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2295   }
2296   PetscFunctionReturn(0);
2297 }
2298 
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2301 /*@C
2302    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2303    (the default parallel PETSc format).  For good matrix assembly performance
2304    the user should preallocate the matrix storage by setting the parameters
2305    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2306    performance can be increased by more than a factor of 50.
2307 
2308    Collective on MPI_Comm
2309 
2310    Input Parameters:
2311 +  A - the matrix
2312 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2313            (same value is used for all local rows)
2314 .  d_nnz - array containing the number of nonzeros in the various rows of the
2315            DIAGONAL portion of the local submatrix (possibly different for each row)
2316            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2317            The size of this array is equal to the number of local rows, i.e 'm'.
2318            You must leave room for the diagonal entry even if it is zero.
2319 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2320            submatrix (same value is used for all local rows).
2321 -  o_nnz - array containing the number of nonzeros in the various rows of the
2322            OFF-DIAGONAL portion of the local submatrix (possibly different for
2323            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2324            structure. The size of this array is equal to the number
2325            of local rows, i.e 'm'.
2326 
2327    If the *_nnz parameter is given then the *_nz parameter is ignored
2328 
2329    The AIJ format (also called the Yale sparse matrix format or
2330    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2331    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2332 
2333    The parallel matrix is partitioned such that the first m0 rows belong to
2334    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2335    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2336 
2337    The DIAGONAL portion of the local submatrix of a processor can be defined
2338    as the submatrix which is obtained by extraction the part corresponding
2339    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2340    first row that belongs to the processor, and r2 is the last row belonging
2341    to the this processor. This is a square mxm matrix. The remaining portion
2342    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2343 
2344    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2345 
2346    Example usage:
2347 
2348    Consider the following 8x8 matrix with 34 non-zero values, that is
2349    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2350    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2351    as follows:
2352 
2353 .vb
2354             1  2  0  |  0  3  0  |  0  4
2355     Proc0   0  5  6  |  7  0  0  |  8  0
2356             9  0 10  | 11  0  0  | 12  0
2357     -------------------------------------
2358            13  0 14  | 15 16 17  |  0  0
2359     Proc1   0 18  0  | 19 20 21  |  0  0
2360             0  0  0  | 22 23  0  | 24  0
2361     -------------------------------------
2362     Proc2  25 26 27  |  0  0 28  | 29  0
2363            30  0  0  | 31 32 33  |  0 34
2364 .ve
2365 
2366    This can be represented as a collection of submatrices as:
2367 
2368 .vb
2369       A B C
2370       D E F
2371       G H I
2372 .ve
2373 
2374    Where the submatrices A,B,C are owned by proc0, D,E,F are
2375    owned by proc1, G,H,I are owned by proc2.
2376 
2377    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2378    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2379    The 'M','N' parameters are 8,8, and have the same values on all procs.
2380 
2381    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2382    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2383    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2384    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2385    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2386    matrix, ans [DF] as another SeqAIJ matrix.
2387 
2388    When d_nz, o_nz parameters are specified, d_nz storage elements are
2389    allocated for every row of the local diagonal submatrix, and o_nz
2390    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2391    One way to choose d_nz and o_nz is to use the max nonzerors per local
2392    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2393    In this case, the values of d_nz,o_nz are:
2394 .vb
2395      proc0 : dnz = 2, o_nz = 2
2396      proc1 : dnz = 3, o_nz = 2
2397      proc2 : dnz = 1, o_nz = 4
2398 .ve
2399    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2400    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2401    for proc3. i.e we are using 12+15+10=37 storage locations to store
2402    34 values.
2403 
2404    When d_nnz, o_nnz parameters are specified, the storage is specified
2405    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2406    In the above case the values for d_nnz,o_nnz are:
2407 .vb
2408      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2409      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2410      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2411 .ve
2412    Here the space allocated is sum of all the above values i.e 34, and
2413    hence pre-allocation is perfect.
2414 
2415    Level: intermediate
2416 
2417 .keywords: matrix, aij, compressed row, sparse, parallel
2418 
2419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2420           MPIAIJ
2421 @*/
2422 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2423 {
2424   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2425 
2426   PetscFunctionBegin;
2427   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2428   if (f) {
2429     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatCreateMPIAIJ"
2436 /*@C
2437    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2438    (the default parallel PETSc format).  For good matrix assembly performance
2439    the user should preallocate the matrix storage by setting the parameters
2440    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2441    performance can be increased by more than a factor of 50.
2442 
2443    Collective on MPI_Comm
2444 
2445    Input Parameters:
2446 +  comm - MPI communicator
2447 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2448            This value should be the same as the local size used in creating the
2449            y vector for the matrix-vector product y = Ax.
2450 .  n - This value should be the same as the local size used in creating the
2451        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2452        calculated if N is given) For square matrices n is almost always m.
2453 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2454 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2455 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2456            (same value is used for all local rows)
2457 .  d_nnz - array containing the number of nonzeros in the various rows of the
2458            DIAGONAL portion of the local submatrix (possibly different for each row)
2459            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2460            The size of this array is equal to the number of local rows, i.e 'm'.
2461            You must leave room for the diagonal entry even if it is zero.
2462 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2463            submatrix (same value is used for all local rows).
2464 -  o_nnz - array containing the number of nonzeros in the various rows of the
2465            OFF-DIAGONAL portion of the local submatrix (possibly different for
2466            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2467            structure. The size of this array is equal to the number
2468            of local rows, i.e 'm'.
2469 
2470    Output Parameter:
2471 .  A - the matrix
2472 
2473    Notes:
2474    If the *_nnz parameter is given then the *_nz parameter is ignored
2475 
2476    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2477    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2478    storage requirements for this matrix.
2479 
2480    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2481    processor than it must be used on all processors that share the object for
2482    that argument.
2483 
2484    The user MUST specify either the local or global matrix dimensions
2485    (possibly both).
2486 
2487    The parallel matrix is partitioned such that the first m0 rows belong to
2488    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2489    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2490 
2491    The DIAGONAL portion of the local submatrix of a processor can be defined
2492    as the submatrix which is obtained by extraction the part corresponding
2493    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2494    first row that belongs to the processor, and r2 is the last row belonging
2495    to the this processor. This is a square mxm matrix. The remaining portion
2496    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2497 
2498    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2499 
2500    When calling this routine with a single process communicator, a matrix of
2501    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2502    type of communicator, use the construction mechanism:
2503      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2504 
2505    By default, this format uses inodes (identical nodes) when possible.
2506    We search for consecutive rows with the same nonzero structure, thereby
2507    reusing matrix information to achieve increased efficiency.
2508 
2509    Options Database Keys:
2510 +  -mat_aij_no_inode  - Do not use inodes
2511 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2512 -  -mat_aij_oneindex - Internally use indexing starting at 1
2513         rather than 0.  Note that when calling MatSetValues(),
2514         the user still MUST index entries starting at 0!
2515 
2516 
2517    Example usage:
2518 
2519    Consider the following 8x8 matrix with 34 non-zero values, that is
2520    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2521    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2522    as follows:
2523 
2524 .vb
2525             1  2  0  |  0  3  0  |  0  4
2526     Proc0   0  5  6  |  7  0  0  |  8  0
2527             9  0 10  | 11  0  0  | 12  0
2528     -------------------------------------
2529            13  0 14  | 15 16 17  |  0  0
2530     Proc1   0 18  0  | 19 20 21  |  0  0
2531             0  0  0  | 22 23  0  | 24  0
2532     -------------------------------------
2533     Proc2  25 26 27  |  0  0 28  | 29  0
2534            30  0  0  | 31 32 33  |  0 34
2535 .ve
2536 
2537    This can be represented as a collection of submatrices as:
2538 
2539 .vb
2540       A B C
2541       D E F
2542       G H I
2543 .ve
2544 
2545    Where the submatrices A,B,C are owned by proc0, D,E,F are
2546    owned by proc1, G,H,I are owned by proc2.
2547 
2548    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2549    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2550    The 'M','N' parameters are 8,8, and have the same values on all procs.
2551 
2552    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2553    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2554    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2555    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2556    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2557    matrix, ans [DF] as another SeqAIJ matrix.
2558 
2559    When d_nz, o_nz parameters are specified, d_nz storage elements are
2560    allocated for every row of the local diagonal submatrix, and o_nz
2561    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2562    One way to choose d_nz and o_nz is to use the max nonzerors per local
2563    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2564    In this case, the values of d_nz,o_nz are:
2565 .vb
2566      proc0 : dnz = 2, o_nz = 2
2567      proc1 : dnz = 3, o_nz = 2
2568      proc2 : dnz = 1, o_nz = 4
2569 .ve
2570    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2571    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2572    for proc3. i.e we are using 12+15+10=37 storage locations to store
2573    34 values.
2574 
2575    When d_nnz, o_nnz parameters are specified, the storage is specified
2576    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2577    In the above case the values for d_nnz,o_nnz are:
2578 .vb
2579      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2580      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2581      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2582 .ve
2583    Here the space allocated is sum of all the above values i.e 34, and
2584    hence pre-allocation is perfect.
2585 
2586    Level: intermediate
2587 
2588 .keywords: matrix, aij, compressed row, sparse, parallel
2589 
2590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2591           MPIAIJ
2592 @*/
2593 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2594 {
2595   PetscErrorCode ierr;
2596   PetscMPIInt    size;
2597 
2598   PetscFunctionBegin;
2599   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601   if (size > 1) {
2602     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2603     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2604   } else {
2605     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2606     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2613 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2614 {
2615   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2616 
2617   PetscFunctionBegin;
2618   *Ad     = a->A;
2619   *Ao     = a->B;
2620   *colmap = a->garray;
2621   PetscFunctionReturn(0);
2622 }
2623 
2624 #undef __FUNCT__
2625 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2626 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2627 {
2628   PetscErrorCode ierr;
2629   PetscInt       i;
2630   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2631 
2632   PetscFunctionBegin;
2633   if (coloring->ctype == IS_COLORING_LOCAL) {
2634     ISColoringValue *allcolors,*colors;
2635     ISColoring      ocoloring;
2636 
2637     /* set coloring for diagonal portion */
2638     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2639 
2640     /* set coloring for off-diagonal portion */
2641     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2642     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2643     for (i=0; i<a->B->n; i++) {
2644       colors[i] = allcolors[a->garray[i]];
2645     }
2646     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2647     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2648     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2649     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2650   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2651     ISColoringValue *colors;
2652     PetscInt             *larray;
2653     ISColoring      ocoloring;
2654 
2655     /* set coloring for diagonal portion */
2656     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2657     for (i=0; i<a->A->n; i++) {
2658       larray[i] = i + a->cstart;
2659     }
2660     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2661     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2662     for (i=0; i<a->A->n; i++) {
2663       colors[i] = coloring->colors[larray[i]];
2664     }
2665     ierr = PetscFree(larray);CHKERRQ(ierr);
2666     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2667     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2668     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2669 
2670     /* set coloring for off-diagonal portion */
2671     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2672     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2673     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2674     for (i=0; i<a->B->n; i++) {
2675       colors[i] = coloring->colors[larray[i]];
2676     }
2677     ierr = PetscFree(larray);CHKERRQ(ierr);
2678     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2679     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2680     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2681   } else {
2682     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2683   }
2684 
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #if defined(PETSC_HAVE_ADIC)
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2691 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2692 {
2693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2698   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2699   PetscFunctionReturn(0);
2700 }
2701 #endif
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2706 {
2707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2712   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatMerge"
2718 /*@C
2719       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2720                  matrices from each processor
2721 
2722     Collective on MPI_Comm
2723 
2724    Input Parameters:
2725 +    comm - the communicators the parallel matrix will live on
2726 .    inmat - the input sequential matrices
2727 .    n - number of local columns (or PETSC_DECIDE)
2728 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2729 
2730    Output Parameter:
2731 .    outmat - the parallel matrix generated
2732 
2733     Level: advanced
2734 
2735    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2736 
2737 @*/
2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2739 {
2740   PetscErrorCode ierr;
2741   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2742   PetscInt       *indx;
2743   PetscScalar    *values;
2744   PetscMap       columnmap,rowmap;
2745 
2746   PetscFunctionBegin;
2747     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2748   /*
2749   PetscMPIInt       rank;
2750   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2751   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2752   */
2753   if (scall == MAT_INITIAL_MATRIX){
2754     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2755     if (n == PETSC_DECIDE){
2756       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2757       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2758       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2759       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2760       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2761     }
2762 
2763     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2764     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2765     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2766     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2767     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2768 
2769     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2770     for (i=0;i<m;i++) {
2771       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2772       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2773       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2774     }
2775     /* This routine will ONLY return MPIAIJ type matrix */
2776     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2777     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2778     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2779     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2780 
2781   } else if (scall == MAT_REUSE_MATRIX){
2782     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2783   } else {
2784     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2785   }
2786 
2787   for (i=0;i<m;i++) {
2788     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2789     I    = i + rstart;
2790     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2791     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2792   }
2793   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2794   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2795   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796 
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 #undef __FUNCT__
2801 #define __FUNCT__ "MatFileSplit"
2802 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2803 {
2804   PetscErrorCode    ierr;
2805   PetscMPIInt       rank;
2806   PetscInt          m,N,i,rstart,nnz;
2807   size_t            len;
2808   const PetscInt    *indx;
2809   PetscViewer       out;
2810   char              *name;
2811   Mat               B;
2812   const PetscScalar *values;
2813 
2814   PetscFunctionBegin;
2815   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2816   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2817   /* Should this be the type of the diagonal block of A? */
2818   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2819   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2820   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2821   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2822   for (i=0;i<m;i++) {
2823     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2824     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2825     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2826   }
2827   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2828   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829 
2830   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2831   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2832   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2833   sprintf(name,"%s.%d",outfile,rank);
2834   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2835   ierr = PetscFree(name);
2836   ierr = MatView(B,out);CHKERRQ(ierr);
2837   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2838   ierr = MatDestroy(B);CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2845 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2846 {
2847   PetscErrorCode       ierr;
2848   Mat_Merge_SeqsToMPI  *merge;
2849   PetscObjectContainer container;
2850 
2851   PetscFunctionBegin;
2852   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2853   if (container) {
2854     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2855     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2856     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2857     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2858     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2859     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2860     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2862     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2863     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2864     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2865     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2866 
2867     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2868     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2869   }
2870   ierr = PetscFree(merge);CHKERRQ(ierr);
2871 
2872   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #include "src/mat/utils/freespace.h"
2877 #include "petscbt.h"
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatMerge_SeqsToMPI"
2880 /*@C
2881       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2882                  matrices from each processor
2883 
2884     Collective on MPI_Comm
2885 
2886    Input Parameters:
2887 +    comm - the communicators the parallel matrix will live on
2888 .    seqmat - the input sequential matrices
2889 .    m - number of local rows (or PETSC_DECIDE)
2890 .    n - number of local columns (or PETSC_DECIDE)
2891 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2892 
2893    Output Parameter:
2894 .    mpimat - the parallel matrix generated
2895 
2896     Level: advanced
2897 
2898    Notes:
2899      The dimensions of the sequential matrix in each processor MUST be the same.
2900      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2901      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2902 @*/
2903 static PetscEvent logkey_seqstompinum = 0;
2904 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2905 {
2906   PetscErrorCode       ierr;
2907   MPI_Comm             comm=mpimat->comm;
2908   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2909   PetscMPIInt          size,rank,taga,*len_s;
2910   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2911   PetscInt             proc,m;
2912   PetscInt             **buf_ri,**buf_rj;
2913   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2914   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2915   MPI_Request          *s_waits,*r_waits;
2916   MPI_Status           *status;
2917   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2918   Mat_Merge_SeqsToMPI  *merge;
2919   PetscObjectContainer container;
2920 
2921   PetscFunctionBegin;
2922   if (!logkey_seqstompinum) {
2923     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2924   }
2925   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2926 
2927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2929 
2930   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2931   if (container) {
2932     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2933   }
2934   bi     = merge->bi;
2935   bj     = merge->bj;
2936   buf_ri = merge->buf_ri;
2937   buf_rj = merge->buf_rj;
2938 
2939   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2940   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2941   len_s  = merge->len_s;
2942 
2943   /* send and recv matrix values */
2944   /*-----------------------------*/
2945   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2946   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2947 
2948   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2949   for (proc=0,k=0; proc<size; proc++){
2950     if (!len_s[proc]) continue;
2951     i = owners[proc];
2952     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2953     k++;
2954   }
2955 
2956   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2957   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2958   ierr = PetscFree(status);CHKERRQ(ierr);
2959 
2960   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2961   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2962 
2963   /* insert mat values of mpimat */
2964   /*----------------------------*/
2965   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2966   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2967   nextrow = buf_ri_k + merge->nrecv;
2968   nextai  = nextrow + merge->nrecv;
2969 
2970   for (k=0; k<merge->nrecv; k++){
2971     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2972     nrows = *(buf_ri_k[k]);
2973     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2974     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2975   }
2976 
2977   /* set values of ba */
2978   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2979   for (i=0; i<m; i++) {
2980     arow = owners[rank] + i;
2981     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2982     bnzi = bi[i+1] - bi[i];
2983     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2984 
2985     /* add local non-zero vals of this proc's seqmat into ba */
2986     anzi = ai[arow+1] - ai[arow];
2987     aj   = a->j + ai[arow];
2988     aa   = a->a + ai[arow];
2989     nextaj = 0;
2990     for (j=0; nextaj<anzi; j++){
2991       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2992         ba_i[j] += aa[nextaj++];
2993       }
2994     }
2995 
2996     /* add received vals into ba */
2997     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2998       /* i-th row */
2999       if (i == *nextrow[k]) {
3000         anzi = *(nextai[k]+1) - *nextai[k];
3001         aj   = buf_rj[k] + *(nextai[k]);
3002         aa   = abuf_r[k] + *(nextai[k]);
3003         nextaj = 0;
3004         for (j=0; nextaj<anzi; j++){
3005           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3006             ba_i[j] += aa[nextaj++];
3007           }
3008         }
3009         nextrow[k]++; nextai[k]++;
3010       }
3011     }
3012     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3013   }
3014   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3015   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3016 
3017   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3018   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3019   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3020   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3021   PetscFunctionReturn(0);
3022 }
3023 static PetscEvent logkey_seqstompisym = 0;
3024 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3025 {
3026   PetscErrorCode       ierr;
3027   Mat                  B_mpi;
3028   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3029   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3030   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3031   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3032   PetscInt             len,proc,*dnz,*onz;
3033   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3034   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3035   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3036   MPI_Status           *status;
3037   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3038   PetscBT              lnkbt;
3039   Mat_Merge_SeqsToMPI  *merge;
3040   PetscObjectContainer container;
3041 
3042   PetscFunctionBegin;
3043   if (!logkey_seqstompisym) {
3044     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3045   }
3046   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3047 
3048   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3049   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3050 
3051   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3052   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3053 
3054   /* determine row ownership */
3055   /*---------------------------------------------------------*/
3056   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3057   if (m == PETSC_DECIDE) {
3058     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3059   } else {
3060     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3061   }
3062   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3063   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3064   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3065 
3066   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3067   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3068 
3069   /* determine the number of messages to send, their lengths */
3070   /*---------------------------------------------------------*/
3071   len_s  = merge->len_s;
3072 
3073   len = 0;  /* length of buf_si[] */
3074   merge->nsend = 0;
3075   for (proc=0; proc<size; proc++){
3076     len_si[proc] = 0;
3077     if (proc == rank){
3078       len_s[proc] = 0;
3079     } else {
3080       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3081       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3082     }
3083     if (len_s[proc]) {
3084       merge->nsend++;
3085       nrows = 0;
3086       for (i=owners[proc]; i<owners[proc+1]; i++){
3087         if (ai[i+1] > ai[i]) nrows++;
3088       }
3089       len_si[proc] = 2*(nrows+1);
3090       len += len_si[proc];
3091     }
3092   }
3093 
3094   /* determine the number and length of messages to receive for ij-structure */
3095   /*-------------------------------------------------------------------------*/
3096   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3097   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3098 
3099   /* post the Irecv of j-structure */
3100   /*-------------------------------*/
3101   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3102   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3103 
3104   /* post the Isend of j-structure */
3105   /*--------------------------------*/
3106   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3107   sj_waits = si_waits + merge->nsend;
3108 
3109   for (proc=0, k=0; proc<size; proc++){
3110     if (!len_s[proc]) continue;
3111     i = owners[proc];
3112     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3113     k++;
3114   }
3115 
3116   /* receives and sends of j-structure are complete */
3117   /*------------------------------------------------*/
3118   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3119   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3120 
3121   /* send and recv i-structure */
3122   /*---------------------------*/
3123   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3124   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3125 
3126   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3127   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3128   for (proc=0,k=0; proc<size; proc++){
3129     if (!len_s[proc]) continue;
3130     /* form outgoing message for i-structure:
3131          buf_si[0]:                 nrows to be sent
3132                [1:nrows]:           row index (global)
3133                [nrows+1:2*nrows+1]: i-structure index
3134     */
3135     /*-------------------------------------------*/
3136     nrows = len_si[proc]/2 - 1;
3137     buf_si_i    = buf_si + nrows+1;
3138     buf_si[0]   = nrows;
3139     buf_si_i[0] = 0;
3140     nrows = 0;
3141     for (i=owners[proc]; i<owners[proc+1]; i++){
3142       anzi = ai[i+1] - ai[i];
3143       if (anzi) {
3144         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3145         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3146         nrows++;
3147       }
3148     }
3149     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3150     k++;
3151     buf_si += len_si[proc];
3152   }
3153 
3154   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3155   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3156 
3157   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3158   for (i=0; i<merge->nrecv; i++){
3159     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3160   }
3161 
3162   ierr = PetscFree(len_si);CHKERRQ(ierr);
3163   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3164   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3165   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3166   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3167   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3168   ierr = PetscFree(status);CHKERRQ(ierr);
3169 
3170   /* compute a local seq matrix in each processor */
3171   /*----------------------------------------------*/
3172   /* allocate bi array and free space for accumulating nonzero column info */
3173   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3174   bi[0] = 0;
3175 
3176   /* create and initialize a linked list */
3177   nlnk = N+1;
3178   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3179 
3180   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3181   len = 0;
3182   len  = ai[owners[rank+1]] - ai[owners[rank]];
3183   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3184   current_space = free_space;
3185 
3186   /* determine symbolic info for each local row */
3187   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3188   nextrow = buf_ri_k + merge->nrecv;
3189   nextai  = nextrow + merge->nrecv;
3190   for (k=0; k<merge->nrecv; k++){
3191     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3192     nrows = *buf_ri_k[k];
3193     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3194     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3195   }
3196 
3197   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3198   len = 0;
3199   for (i=0;i<m;i++) {
3200     bnzi   = 0;
3201     /* add local non-zero cols of this proc's seqmat into lnk */
3202     arow   = owners[rank] + i;
3203     anzi   = ai[arow+1] - ai[arow];
3204     aj     = a->j + ai[arow];
3205     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3206     bnzi += nlnk;
3207     /* add received col data into lnk */
3208     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3209       if (i == *nextrow[k]) { /* i-th row */
3210         anzi = *(nextai[k]+1) - *nextai[k];
3211         aj   = buf_rj[k] + *nextai[k];
3212         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3213         bnzi += nlnk;
3214         nextrow[k]++; nextai[k]++;
3215       }
3216     }
3217     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3218 
3219     /* if free space is not available, make more free space */
3220     if (current_space->local_remaining<bnzi) {
3221       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3222       nspacedouble++;
3223     }
3224     /* copy data into free space, then initialize lnk */
3225     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3226     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3227 
3228     current_space->array           += bnzi;
3229     current_space->local_used      += bnzi;
3230     current_space->local_remaining -= bnzi;
3231 
3232     bi[i+1] = bi[i] + bnzi;
3233   }
3234 
3235   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3236 
3237   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3238   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3239   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3240 
3241   /* create symbolic parallel matrix B_mpi */
3242   /*---------------------------------------*/
3243   if (n==PETSC_DECIDE) {
3244     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3245   } else {
3246     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3247   }
3248   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3249   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3250   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3251 
3252   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3253   B_mpi->assembled     = PETSC_FALSE;
3254   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3255   merge->bi            = bi;
3256   merge->bj            = bj;
3257   merge->buf_ri        = buf_ri;
3258   merge->buf_rj        = buf_rj;
3259   merge->coi           = PETSC_NULL;
3260   merge->coj           = PETSC_NULL;
3261   merge->owners_co     = PETSC_NULL;
3262 
3263   /* attach the supporting struct to B_mpi for reuse */
3264   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3265   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3266   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3267   *mpimat = B_mpi;
3268   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3269   PetscFunctionReturn(0);
3270 }
3271 
3272 static PetscEvent logkey_seqstompi = 0;
3273 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3274 {
3275   PetscErrorCode   ierr;
3276 
3277   PetscFunctionBegin;
3278   if (!logkey_seqstompi) {
3279     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3280   }
3281   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3282   if (scall == MAT_INITIAL_MATRIX){
3283     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3284   }
3285   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3286   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3287   PetscFunctionReturn(0);
3288 }
3289 static PetscEvent logkey_getlocalmat = 0;
3290 #undef __FUNCT__
3291 #define __FUNCT__ "MatGetLocalMat"
3292 /*@C
3293      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3294 
3295     Not Collective
3296 
3297    Input Parameters:
3298 +    A - the matrix
3299 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3300 
3301    Output Parameter:
3302 .    A_loc - the local sequential matrix generated
3303 
3304     Level: developer
3305 
3306 @*/
3307 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3308 {
3309   PetscErrorCode  ierr;
3310   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3311   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3312   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3313   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3314   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3315   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3316 
3317   PetscFunctionBegin;
3318   if (!logkey_getlocalmat) {
3319     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3320   }
3321   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3322   if (scall == MAT_INITIAL_MATRIX){
3323     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3324     ci[0] = 0;
3325     for (i=0; i<am; i++){
3326       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3327     }
3328     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3329     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3330     k = 0;
3331     for (i=0; i<am; i++) {
3332       ncols_o = bi[i+1] - bi[i];
3333       ncols_d = ai[i+1] - ai[i];
3334       /* off-diagonal portion of A */
3335       for (jo=0; jo<ncols_o; jo++) {
3336         col = cmap[*bj];
3337         if (col >= cstart) break;
3338         cj[k]   = col; bj++;
3339         ca[k++] = *ba++;
3340       }
3341       /* diagonal portion of A */
3342       for (j=0; j<ncols_d; j++) {
3343         cj[k]   = cstart + *aj++;
3344         ca[k++] = *aa++;
3345       }
3346       /* off-diagonal portion of A */
3347       for (j=jo; j<ncols_o; j++) {
3348         cj[k]   = cmap[*bj++];
3349         ca[k++] = *ba++;
3350       }
3351     }
3352     /* put together the new matrix */
3353     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3354     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3355     /* Since these are PETSc arrays, change flags to free them as necessary. */
3356     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3357     mat->freedata = PETSC_TRUE;
3358     mat->nonew    = 0;
3359   } else if (scall == MAT_REUSE_MATRIX){
3360     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3361     ci = mat->i; cj = mat->j; ca = mat->a;
3362     for (i=0; i<am; i++) {
3363       /* off-diagonal portion of A */
3364       ncols_o = bi[i+1] - bi[i];
3365       for (jo=0; jo<ncols_o; jo++) {
3366         col = cmap[*bj];
3367         if (col >= cstart) break;
3368         *ca++ = *ba++; bj++;
3369       }
3370       /* diagonal portion of A */
3371       ncols_d = ai[i+1] - ai[i];
3372       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3373       /* off-diagonal portion of A */
3374       for (j=jo; j<ncols_o; j++) {
3375         *ca++ = *ba++; bj++;
3376       }
3377     }
3378   } else {
3379     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3380   }
3381 
3382   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3383   PetscFunctionReturn(0);
3384 }
3385 
3386 static PetscEvent logkey_getlocalmatcondensed = 0;
3387 #undef __FUNCT__
3388 #define __FUNCT__ "MatGetLocalMatCondensed"
3389 /*@C
3390      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3391 
3392     Not Collective
3393 
3394    Input Parameters:
3395 +    A - the matrix
3396 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3397 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3398 
3399    Output Parameter:
3400 .    A_loc - the local sequential matrix generated
3401 
3402     Level: developer
3403 
3404 @*/
3405 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3406 {
3407   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3408   PetscErrorCode    ierr;
3409   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3410   IS                isrowa,iscola;
3411   Mat               *aloc;
3412 
3413   PetscFunctionBegin;
3414   if (!logkey_getlocalmatcondensed) {
3415     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3416   }
3417   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3418   if (!row){
3419     start = a->rstart; end = a->rend;
3420     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3421   } else {
3422     isrowa = *row;
3423   }
3424   if (!col){
3425     start = a->cstart;
3426     cmap  = a->garray;
3427     nzA   = a->A->n;
3428     nzB   = a->B->n;
3429     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3430     ncols = 0;
3431     for (i=0; i<nzB; i++) {
3432       if (cmap[i] < start) idx[ncols++] = cmap[i];
3433       else break;
3434     }
3435     imark = i;
3436     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3437     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3438     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3439     ierr = PetscFree(idx);CHKERRQ(ierr);
3440   } else {
3441     iscola = *col;
3442   }
3443   if (scall != MAT_INITIAL_MATRIX){
3444     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3445     aloc[0] = *A_loc;
3446   }
3447   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3448   *A_loc = aloc[0];
3449   ierr = PetscFree(aloc);CHKERRQ(ierr);
3450   if (!row){
3451     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3452   }
3453   if (!col){
3454     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3455   }
3456   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 static PetscEvent logkey_GetBrowsOfAcols = 0;
3461 #undef __FUNCT__
3462 #define __FUNCT__ "MatGetBrowsOfAcols"
3463 /*@C
3464     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3465 
3466     Collective on Mat
3467 
3468    Input Parameters:
3469 +    A,B - the matrices in mpiaij format
3470 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3471 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3472 
3473    Output Parameter:
3474 +    rowb, colb - index sets of rows and columns of B to extract
3475 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3476 -    B_seq - the sequential matrix generated
3477 
3478     Level: developer
3479 
3480 @*/
3481 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3482 {
3483   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3484   PetscErrorCode    ierr;
3485   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3486   IS                isrowb,iscolb;
3487   Mat               *bseq;
3488 
3489   PetscFunctionBegin;
3490   if (a->cstart != b->rstart || a->cend != b->rend){
3491     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3492   }
3493   if (!logkey_GetBrowsOfAcols) {
3494     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3495   }
3496   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3497 
3498   if (scall == MAT_INITIAL_MATRIX){
3499     start = a->cstart;
3500     cmap  = a->garray;
3501     nzA   = a->A->n;
3502     nzB   = a->B->n;
3503     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3504     ncols = 0;
3505     for (i=0; i<nzB; i++) {  /* row < local row index */
3506       if (cmap[i] < start) idx[ncols++] = cmap[i];
3507       else break;
3508     }
3509     imark = i;
3510     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3511     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3512     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3513     ierr = PetscFree(idx);CHKERRQ(ierr);
3514     *brstart = imark;
3515     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3516   } else {
3517     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3518     isrowb = *rowb; iscolb = *colb;
3519     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3520     bseq[0] = *B_seq;
3521   }
3522   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3523   *B_seq = bseq[0];
3524   ierr = PetscFree(bseq);CHKERRQ(ierr);
3525   if (!rowb){
3526     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3527   } else {
3528     *rowb = isrowb;
3529   }
3530   if (!colb){
3531     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3532   } else {
3533     *colb = iscolb;
3534   }
3535   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 static PetscEvent logkey_GetBrowsOfAocols = 0;
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatGetBrowsOfAoCols"
3542 /*@C
3543     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3544     of the OFF-DIAGONAL portion of local A
3545 
3546     Collective on Mat
3547 
3548    Input Parameters:
3549 +    A,B - the matrices in mpiaij format
3550 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3551 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3552 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3553 
3554    Output Parameter:
3555 +    B_oth - the sequential matrix generated
3556 
3557     Level: developer
3558 
3559 @*/
3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3561 {
3562   VecScatter_MPI_General *gen_to,*gen_from;
3563   PetscErrorCode         ierr;
3564   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3565   Mat_SeqAIJ             *b_oth;
3566   VecScatter             ctx=a->Mvctx;
3567   MPI_Comm               comm=ctx->comm;
3568   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3569   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3570   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3571   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3572   MPI_Request            *rwaits,*swaits;
3573   MPI_Status             *sstatus,rstatus;
3574   PetscInt               *cols;
3575   PetscScalar            *vals;
3576   PetscMPIInt            j;
3577 
3578   PetscFunctionBegin;
3579   if (a->cstart != b->rstart || a->cend != b->rend){
3580     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3581   }
3582   if (!logkey_GetBrowsOfAocols) {
3583     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3584   }
3585   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3587 
3588   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3589   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3590   rvalues  = gen_from->values; /* holds the length of sending row */
3591   svalues  = gen_to->values;   /* holds the length of receiving row */
3592   nrecvs   = gen_from->n;
3593   nsends   = gen_to->n;
3594   rwaits   = gen_from->requests;
3595   swaits   = gen_to->requests;
3596   rrow     = gen_from->indices; /* local row index to be received */
3597   srow     = gen_to->indices;   /* local row index to be sent */
3598   rstarts  = gen_from->starts;
3599   sstarts  = gen_to->starts;
3600   rprocs   = gen_from->procs;
3601   sprocs   = gen_to->procs;
3602   sstatus  = gen_to->sstatus;
3603 
3604   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3605   if (scall == MAT_INITIAL_MATRIX){
3606     /* i-array */
3607     /*---------*/
3608     /*  post receives */
3609     for (i=0; i<nrecvs; i++){
3610       rowlen = (PetscInt*)rvalues + rstarts[i];
3611       nrows = rstarts[i+1]-rstarts[i];
3612       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3613     }
3614 
3615     /* pack the outgoing message */
3616     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3617     rstartsj = sstartsj + nsends +1;
3618     sstartsj[0] = 0;  rstartsj[0] = 0;
3619     len = 0; /* total length of j or a array to be sent */
3620     k = 0;
3621     for (i=0; i<nsends; i++){
3622       rowlen = (PetscInt*)svalues + sstarts[i];
3623       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3624       for (j=0; j<nrows; j++) {
3625         row = srow[k] + b->rowners[rank]; /* global row idx */
3626         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3627         len += rowlen[j];
3628         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3629         k++;
3630       }
3631       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3632        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3633     }
3634     /* recvs and sends of i-array are completed */
3635     i = nrecvs;
3636     while (i--) {
3637       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3638     }
3639     if (nsends) {
3640       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3641     }
3642     /* allocate buffers for sending j and a arrays */
3643     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3644     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3645 
3646     /* create i-array of B_oth */
3647     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3648     b_othi[0] = 0;
3649     len = 0; /* total length of j or a array to be received */
3650     k = 0;
3651     for (i=0; i<nrecvs; i++){
3652       rowlen = (PetscInt*)rvalues + rstarts[i];
3653       nrows = rstarts[i+1]-rstarts[i];
3654       for (j=0; j<nrows; j++) {
3655         b_othi[k+1] = b_othi[k] + rowlen[j];
3656         len += rowlen[j]; k++;
3657       }
3658       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3659     }
3660 
3661     /* allocate space for j and a arrrays of B_oth */
3662     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3663     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3664 
3665     /* j-array */
3666     /*---------*/
3667     /*  post receives of j-array */
3668     for (i=0; i<nrecvs; i++){
3669       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3670       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3671     }
3672     k = 0;
3673     for (i=0; i<nsends; i++){
3674       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3675       bufJ = bufj+sstartsj[i];
3676       for (j=0; j<nrows; j++) {
3677         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3678         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3679         for (l=0; l<ncols; l++){
3680           *bufJ++ = cols[l];
3681         }
3682         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3683       }
3684       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3685     }
3686 
3687     /* recvs and sends of j-array are completed */
3688     i = nrecvs;
3689     while (i--) {
3690       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3691     }
3692     if (nsends) {
3693       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3694     }
3695   } else if (scall == MAT_REUSE_MATRIX){
3696     sstartsj = *startsj;
3697     rstartsj = sstartsj + nsends +1;
3698     bufa     = *bufa_ptr;
3699     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3700     b_otha   = b_oth->a;
3701   } else {
3702     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3703   }
3704 
3705   /* a-array */
3706   /*---------*/
3707   /*  post receives of a-array */
3708   for (i=0; i<nrecvs; i++){
3709     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3710     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3711   }
3712   k = 0;
3713   for (i=0; i<nsends; i++){
3714     nrows = sstarts[i+1]-sstarts[i];
3715     bufA = bufa+sstartsj[i];
3716     for (j=0; j<nrows; j++) {
3717       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3718       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3719       for (l=0; l<ncols; l++){
3720         *bufA++ = vals[l];
3721       }
3722       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3723 
3724     }
3725     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3726   }
3727   /* recvs and sends of a-array are completed */
3728   i = nrecvs;
3729   while (i--) {
3730     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3731   }
3732    if (nsends) {
3733     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3734   }
3735 
3736   if (scall == MAT_INITIAL_MATRIX){
3737     /* put together the new matrix */
3738     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3739 
3740     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3741     /* Since these are PETSc arrays, change flags to free them as necessary. */
3742     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3743     b_oth->freedata = PETSC_TRUE;
3744     b_oth->nonew    = 0;
3745 
3746     ierr = PetscFree(bufj);CHKERRQ(ierr);
3747     if (!startsj || !bufa_ptr){
3748       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3749       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3750     } else {
3751       *startsj  = sstartsj;
3752       *bufa_ptr = bufa;
3753     }
3754   }
3755   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3756 
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 /*MC
3761    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3762 
3763    Options Database Keys:
3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3765 
3766   Level: beginner
3767 
3768 .seealso: MatCreateMPIAIJ
3769 M*/
3770 
3771 EXTERN_C_BEGIN
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatCreate_MPIAIJ"
3774 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3775 {
3776   Mat_MPIAIJ     *b;
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   PetscMPIInt    size;
3780 
3781   PetscFunctionBegin;
3782   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3783 
3784   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3785   B->data         = (void*)b;
3786   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3787   B->factor       = 0;
3788   B->bs           = 1;
3789   B->assembled    = PETSC_FALSE;
3790   B->mapping      = 0;
3791 
3792   B->insertmode      = NOT_SET_VALUES;
3793   b->size            = size;
3794   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3795 
3796   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3797   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3798 
3799   /* the information in the maps duplicates the information computed below, eventually
3800      we should remove the duplicate information that is not contained in the maps */
3801   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3802   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3803 
3804   /* build local table of row and column ownerships */
3805   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3806   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3807   b->cowners = b->rowners + b->size + 2;
3808   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3809   b->rowners[0] = 0;
3810   for (i=2; i<=b->size; i++) {
3811     b->rowners[i] += b->rowners[i-1];
3812   }
3813   b->rstart = b->rowners[b->rank];
3814   b->rend   = b->rowners[b->rank+1];
3815   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3816   b->cowners[0] = 0;
3817   for (i=2; i<=b->size; i++) {
3818     b->cowners[i] += b->cowners[i-1];
3819   }
3820   b->cstart = b->cowners[b->rank];
3821   b->cend   = b->cowners[b->rank+1];
3822 
3823   /* build cache for off array entries formed */
3824   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3825   b->donotstash  = PETSC_FALSE;
3826   b->colmap      = 0;
3827   b->garray      = 0;
3828   b->roworiented = PETSC_TRUE;
3829 
3830   /* stuff used for matrix vector multiply */
3831   b->lvec      = PETSC_NULL;
3832   b->Mvctx     = PETSC_NULL;
3833 
3834   /* stuff for MatGetRow() */
3835   b->rowindices   = 0;
3836   b->rowvalues    = 0;
3837   b->getrowactive = PETSC_FALSE;
3838 
3839   /* Explicitly create 2 MATSEQAIJ matrices. */
3840   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3841   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3842   PetscLogObjectParent(B,b->A);
3843   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3844   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3845   PetscLogObjectParent(B,b->B);
3846 
3847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3848                                      "MatStoreValues_MPIAIJ",
3849                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3851                                      "MatRetrieveValues_MPIAIJ",
3852                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3854 				     "MatGetDiagonalBlock_MPIAIJ",
3855                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3857 				     "MatIsTranspose_MPIAIJ",
3858 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3860 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3861 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3863 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3864 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3866 				     "MatDiagonalScaleLocal_MPIAIJ",
3867 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 EXTERN_C_END
3871