xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 73e7a55880f7211d317aabb87c8bd5e70ea1847f)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
345 {
346   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
347   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
348   PetscErrorCode ierr;
349   PetscMPIInt    n;
350   PetscInt       i,j,rstart,ncols,flg;
351   PetscInt       *row,*col,other_disassembled;
352   PetscScalar    *val;
353   InsertMode     addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387       /* reaccess the b because aij->B was changed */
388       b    = (Mat_SeqAIJ *)aij->B->data;
389     }
390   }
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
394   }
395 
396   b->inode.use         = PETSC_FALSE;
397   b->compressedrow.use = PETSC_TRUE;
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405 
406   /* used by MatAXPY() */
407   a->xtoy = 0; b->xtoy = 0;
408   a->XtoY = 0; b->XtoY = 0;
409 
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
416 {
417   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
422   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatZeroRows_MPIAIJ"
428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
429 {
430   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
431   PetscErrorCode ierr;
432   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
433   PetscInt       i,N,*rows,*owners = l->rowners;
434   PetscInt       *nprocs,j,idx,nsends,row;
435   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
436   PetscInt       *rvalues,count,base,slen,*source;
437   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
438   MPI_Comm       comm = A->comm;
439   MPI_Request    *send_waits,*recv_waits;
440   MPI_Status     recv_status,*send_status;
441   IS             istmp;
442   PetscTruth     found;
443 
444   PetscFunctionBegin;
445   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
446   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
447 
448   /*  first count number of contributors to each processor */
449   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
450   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
451   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
466 
467   /* post receives:   */
468   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
470   for (i=0; i<nrecvs; i++) {
471     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
472   }
473 
474   /* do sends:
475       1) starts[i] gives the starting index in svalues for stuff going to
476          the ith processor
477   */
478   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
480   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
481   starts[0] = 0;
482   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
483   for (i=0; i<N; i++) {
484     svalues[starts[owner[i]]++] = rows[i];
485   }
486   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
487 
488   starts[0] = 0;
489   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
490   count = 0;
491   for (i=0; i<size; i++) {
492     if (nprocs[2*i+1]) {
493       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
494     }
495   }
496   ierr = PetscFree(starts);CHKERRQ(ierr);
497 
498   base = owners[rank];
499 
500   /*  wait on receives */
501   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
502   source = lens + nrecvs;
503   count  = nrecvs; slen = 0;
504   while (count) {
505     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
506     /* unpack receives into our local space */
507     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
508     source[imdex]  = recv_status.MPI_SOURCE;
509     lens[imdex]    = n;
510     slen          += n;
511     count--;
512   }
513   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
514 
515   /* move the data into the send scatter */
516   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
517   count = 0;
518   for (i=0; i<nrecvs; i++) {
519     values = rvalues + i*nmax;
520     for (j=0; j<lens[i]; j++) {
521       lrows[count++] = values[j] - base;
522     }
523   }
524   ierr = PetscFree(rvalues);CHKERRQ(ierr);
525   ierr = PetscFree(lens);CHKERRQ(ierr);
526   ierr = PetscFree(owner);CHKERRQ(ierr);
527   ierr = PetscFree(nprocs);CHKERRQ(ierr);
528 
529   /* actually zap the local rows */
530   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
531   PetscLogObjectParent(A,istmp);
532 
533   /*
534         Zero the required rows. If the "diagonal block" of the matrix
535      is square and the user wishes to set the diagonal we use seperate
536      code so that MatSetValues() is not called for each diagonal allocating
537      new memory, thus calling lots of mallocs and slowing things down.
538 
539        Contributed by: Mathew Knepley
540   */
541   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
542   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
543   if (diag && (l->A->M == l->A->N)) {
544     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
545   } else if (diag) {
546     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
547     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
548       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
549 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
550     }
551     for (i = 0; i < slen; i++) {
552       row  = lrows[i] + rstart;
553       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
554     }
555     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557   } else {
558     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
559   }
560   ierr = ISDestroy(istmp);CHKERRQ(ierr);
561   ierr = PetscFree(lrows);CHKERRQ(ierr);
562 
563   /* wait on sends */
564   if (nsends) {
565     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
566     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
567     ierr = PetscFree(send_status);CHKERRQ(ierr);
568   }
569   ierr = PetscFree(send_waits);CHKERRQ(ierr);
570   ierr = PetscFree(svalues);CHKERRQ(ierr);
571 
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMult_MPIAIJ"
577 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscInt       nt;
582 
583   PetscFunctionBegin;
584   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
585   if (nt != A->n) {
586     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
587   }
588   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
590   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatMultAdd_MPIAIJ"
597 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
598 {
599   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
600   PetscErrorCode ierr;
601 
602   PetscFunctionBegin;
603   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
605   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
612 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
613 {
614   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
615   PetscErrorCode ierr;
616 
617   PetscFunctionBegin;
618   /* do nondiagonal part */
619   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
620   /* send it on its way */
621   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   /* do local part */
623   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
624   /* receive remote parts: note this assumes the values are not actually */
625   /* inserted in yy until the next line, which is true for my implementation*/
626   /* but is not perhaps always true. */
627   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 EXTERN_C_BEGIN
632 #undef __FUNCT__
633 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
634 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
635 {
636   MPI_Comm       comm;
637   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
638   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
639   IS             Me,Notme;
640   PetscErrorCode ierr;
641   PetscInt       M,N,first,last,*notme,i;
642   PetscMPIInt    size;
643 
644   PetscFunctionBegin;
645 
646   /* Easy test: symmetric diagonal block */
647   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
648   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
649   if (!*f) PetscFunctionReturn(0);
650   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
652   if (size == 1) PetscFunctionReturn(0);
653 
654   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
655   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
656   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
657   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
658   for (i=0; i<first; i++) notme[i] = i;
659   for (i=last; i<M; i++) notme[i-last+first] = i;
660   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
661   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
662   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
663   Aoff = Aoffs[0];
664   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
665   Boff = Boffs[0];
666   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
667   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
668   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
669   ierr = ISDestroy(Me);CHKERRQ(ierr);
670   ierr = ISDestroy(Notme);CHKERRQ(ierr);
671 
672   PetscFunctionReturn(0);
673 }
674 EXTERN_C_END
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
678 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681   PetscErrorCode ierr;
682 
683   PetscFunctionBegin;
684   /* do nondiagonal part */
685   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
686   /* send it on its way */
687   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
688   /* do local part */
689   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
690   /* receive remote parts: note this assumes the values are not actually */
691   /* inserted in yy until the next line, which is true for my implementation*/
692   /* but is not perhaps always true. */
693   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 /*
698   This only works correctly for square matrices where the subblock A->A is the
699    diagonal block
700 */
701 #undef __FUNCT__
702 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
703 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
704 {
705   PetscErrorCode ierr;
706   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
707 
708   PetscFunctionBegin;
709   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
710   if (a->rstart != a->cstart || a->rend != a->cend) {
711     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
712   }
713   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatScale_MPIAIJ"
719 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
720 {
721   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
722   PetscErrorCode ierr;
723 
724   PetscFunctionBegin;
725   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
726   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatDestroy_MPIAIJ"
732 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
733 {
734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738 #if defined(PETSC_USE_LOG)
739   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
740 #endif
741   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
742   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
743   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
744   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
745 #if defined (PETSC_USE_CTABLE)
746   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
747 #else
748   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
749 #endif
750   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
751   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
752   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
753   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
754   ierr = PetscFree(aij);CHKERRQ(ierr);
755 
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatView_MPIAIJ_Binary"
768 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
769 {
770   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
771   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
772   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
773   PetscErrorCode    ierr;
774   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
775   int               fd;
776   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
777   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
778   PetscScalar       *column_values;
779 
780   PetscFunctionBegin;
781   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
782   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
783   nz   = A->nz + B->nz;
784   if (!rank) {
785     header[0] = MAT_FILE_COOKIE;
786     header[1] = mat->M;
787     header[2] = mat->N;
788     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
789     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
790     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     /* get largest number of rows any processor has */
792     rlen = mat->m;
793     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       rlen = PetscMax(rlen,range[i+1] - range[i]);
796     }
797   } else {
798     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     rlen = mat->m;
800   }
801 
802   /* load up the local row counts */
803   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
804   for (i=0; i<mat->m; i++) {
805     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
806   }
807 
808   /* store the row lengths to the file */
809   if (!rank) {
810     MPI_Status status;
811     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
812     for (i=1; i<size; i++) {
813       rlen = range[i+1] - range[i];
814       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
815       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
816     }
817   } else {
818     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
819   }
820   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
821 
822   /* load up the local column indices */
823   nzmax = nz; /* )th processor needs space a largest processor needs */
824   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
825   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
826   cnt  = 0;
827   for (i=0; i<mat->m; i++) {
828     for (j=B->i[i]; j<B->i[i+1]; j++) {
829       if ( (col = garray[B->j[j]]) > cstart) break;
830       column_indices[cnt++] = col;
831     }
832     for (k=A->i[i]; k<A->i[i+1]; k++) {
833       column_indices[cnt++] = A->j[k] + cstart;
834     }
835     for (; j<B->i[i+1]; j++) {
836       column_indices[cnt++] = garray[B->j[j]];
837     }
838   }
839   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
840 
841   /* store the column indices to the file */
842   if (!rank) {
843     MPI_Status status;
844     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
845     for (i=1; i<size; i++) {
846       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
848       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
850     }
851   } else {
852     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
853     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
854   }
855   ierr = PetscFree(column_indices);CHKERRQ(ierr);
856 
857   /* load up the local column values */
858   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
859   cnt  = 0;
860   for (i=0; i<mat->m; i++) {
861     for (j=B->i[i]; j<B->i[i+1]; j++) {
862       if ( garray[B->j[j]] > cstart) break;
863       column_values[cnt++] = B->a[j];
864     }
865     for (k=A->i[i]; k<A->i[i+1]; k++) {
866       column_values[cnt++] = A->a[k];
867     }
868     for (; j<B->i[i+1]; j++) {
869       column_values[cnt++] = B->a[j];
870     }
871   }
872   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
873 
874   /* store the column values to the file */
875   if (!rank) {
876     MPI_Status status;
877     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
878     for (i=1; i<size; i++) {
879       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
881       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
883     }
884   } else {
885     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
886     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
887   }
888   ierr = PetscFree(column_values);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
894 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
895 {
896   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
897   PetscErrorCode    ierr;
898   PetscMPIInt       rank = aij->rank,size = aij->size;
899   PetscTruth        isdraw,iascii,flg,isbinary;
900   PetscViewer       sviewer;
901   PetscViewerFormat format;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
907   if (iascii) {
908     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
909     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
910       MatInfo info;
911       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
912       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
913       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
914       if (flg) {
915         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
916 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
917       } else {
918         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
919 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
920       }
921       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
923       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
926       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
927       PetscFunctionReturn(0);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
931       PetscFunctionReturn(0);
932     }
933   } else if (isbinary) {
934     if (size == 1) {
935       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
936       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
937     } else {
938       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
939     }
940     PetscFunctionReturn(0);
941   } else if (isdraw) {
942     PetscDraw  draw;
943     PetscTruth isnull;
944     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
945     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
946   }
947 
948   if (size == 1) {
949     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
950     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
951   } else {
952     /* assemble the entire matrix onto first processor. */
953     Mat         A;
954     Mat_SeqAIJ  *Aloc;
955     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
956     PetscScalar *a;
957 
958     if (!rank) {
959       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
960     } else {
961       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
962     }
963     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
964     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
965     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
966     PetscLogObjectParent(mat,A);
967 
968     /* copy over the A part */
969     Aloc = (Mat_SeqAIJ*)aij->A->data;
970     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row = aij->rstart;
972     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
973     for (i=0; i<m; i++) {
974       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
975       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
976     }
977     aj = Aloc->j;
978     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
979 
980     /* copy over the B part */
981     Aloc = (Mat_SeqAIJ*)aij->B->data;
982     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
983     row  = aij->rstart;
984     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
985     ct   = cols;
986     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
987     for (i=0; i<m; i++) {
988       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
989       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
990     }
991     ierr = PetscFree(ct);CHKERRQ(ierr);
992     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
993     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
994     /*
995        Everyone has to call to draw the matrix since the graphics waits are
996        synchronized across all processors that share the PetscDraw object
997     */
998     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
999     if (!rank) {
1000       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1001       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1002     }
1003     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1004     ierr = MatDestroy(A);CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatView_MPIAIJ"
1011 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1012 {
1013   PetscErrorCode ierr;
1014   PetscTruth     iascii,isdraw,issocket,isbinary;
1015 
1016   PetscFunctionBegin;
1017   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1018   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1019   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1020   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1021   if (iascii || isdraw || isbinary || issocket) {
1022     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1023   } else {
1024     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatRelax_MPIAIJ"
1033 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1034 {
1035   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1036   PetscErrorCode ierr;
1037   Vec            bb1;
1038   PetscScalar    mone=-1.0;
1039 
1040   PetscFunctionBegin;
1041   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1042 
1043   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1044 
1045   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050 
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1081     if (flag & SOR_ZERO_INITIAL_GUESS) {
1082       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1083       its--;
1084     }
1085     while (its--) {
1086       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1087       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1088 
1089       /* update rhs: bb1 = bb - B*x */
1090       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1091       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1092 
1093       /* local sweep */
1094       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1095       CHKERRQ(ierr);
1096     }
1097   } else {
1098     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1099   }
1100 
1101   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1107 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1108 {
1109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1110   Mat            A = mat->A,B = mat->B;
1111   PetscErrorCode ierr;
1112   PetscReal      isend[5],irecv[5];
1113 
1114   PetscFunctionBegin;
1115   info->block_size     = 1.0;
1116   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1117   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1118   isend[3] = info->memory;  isend[4] = info->mallocs;
1119   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1120   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1121   isend[3] += info->memory;  isend[4] += info->mallocs;
1122   if (flag == MAT_LOCAL) {
1123     info->nz_used      = isend[0];
1124     info->nz_allocated = isend[1];
1125     info->nz_unneeded  = isend[2];
1126     info->memory       = isend[3];
1127     info->mallocs      = isend[4];
1128   } else if (flag == MAT_GLOBAL_MAX) {
1129     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1130     info->nz_used      = irecv[0];
1131     info->nz_allocated = irecv[1];
1132     info->nz_unneeded  = irecv[2];
1133     info->memory       = irecv[3];
1134     info->mallocs      = irecv[4];
1135   } else if (flag == MAT_GLOBAL_SUM) {
1136     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1137     info->nz_used      = irecv[0];
1138     info->nz_allocated = irecv[1];
1139     info->nz_unneeded  = irecv[2];
1140     info->memory       = irecv[3];
1141     info->mallocs      = irecv[4];
1142   }
1143   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1144   info->fill_ratio_needed = 0;
1145   info->factor_mallocs    = 0;
1146   info->rows_global       = (double)matin->M;
1147   info->columns_global    = (double)matin->N;
1148   info->rows_local        = (double)matin->m;
1149   info->columns_local     = (double)matin->N;
1150 
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatSetOption_MPIAIJ"
1156 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1157 {
1158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   switch (op) {
1163   case MAT_NO_NEW_NONZERO_LOCATIONS:
1164   case MAT_YES_NEW_NONZERO_LOCATIONS:
1165   case MAT_COLUMNS_UNSORTED:
1166   case MAT_COLUMNS_SORTED:
1167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1168   case MAT_KEEP_ZEROED_ROWS:
1169   case MAT_NEW_NONZERO_LOCATION_ERR:
1170   case MAT_USE_INODES:
1171   case MAT_DO_NOT_USE_INODES:
1172   case MAT_IGNORE_ZERO_ENTRIES:
1173     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1174     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1175     break;
1176   case MAT_ROW_ORIENTED:
1177     a->roworiented = PETSC_TRUE;
1178     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1179     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1180     break;
1181   case MAT_ROWS_SORTED:
1182   case MAT_ROWS_UNSORTED:
1183   case MAT_YES_NEW_DIAGONALS:
1184     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1185     break;
1186   case MAT_COLUMN_ORIENTED:
1187     a->roworiented = PETSC_FALSE;
1188     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1189     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1190     break;
1191   case MAT_IGNORE_OFF_PROC_ENTRIES:
1192     a->donotstash = PETSC_TRUE;
1193     break;
1194   case MAT_NO_NEW_DIAGONALS:
1195     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1196   case MAT_SYMMETRIC:
1197   case MAT_STRUCTURALLY_SYMMETRIC:
1198   case MAT_HERMITIAN:
1199   case MAT_SYMMETRY_ETERNAL:
1200     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1201     break;
1202   case MAT_NOT_SYMMETRIC:
1203   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1204   case MAT_NOT_HERMITIAN:
1205   case MAT_NOT_SYMMETRY_ETERNAL:
1206     break;
1207   default:
1208     SETERRQ(PETSC_ERR_SUP,"unknown option");
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetRow_MPIAIJ"
1215 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1216 {
1217   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1218   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1219   PetscErrorCode ierr;
1220   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1221   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1222   PetscInt       *cmap,*idx_p;
1223 
1224   PetscFunctionBegin;
1225   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1226   mat->getrowactive = PETSC_TRUE;
1227 
1228   if (!mat->rowvalues && (idx || v)) {
1229     /*
1230         allocate enough space to hold information from the longest row.
1231     */
1232     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1233     PetscInt     max = 1,tmp;
1234     for (i=0; i<matin->m; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1236       if (max < tmp) { max = tmp; }
1237     }
1238     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1239     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1240   }
1241 
1242   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1243   lrow = row - rstart;
1244 
1245   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1246   if (!v)   {pvA = 0; pvB = 0;}
1247   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1248   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1249   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1250   nztot = nzA + nzB;
1251 
1252   cmap  = mat->garray;
1253   if (v  || idx) {
1254     if (nztot) {
1255       /* Sort by increasing column numbers, assuming A and B already sorted */
1256       PetscInt imark = -1;
1257       if (v) {
1258         *v = v_p = mat->rowvalues;
1259         for (i=0; i<nzB; i++) {
1260           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1261           else break;
1262         }
1263         imark = i;
1264         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1265         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1266       }
1267       if (idx) {
1268         *idx = idx_p = mat->rowindices;
1269         if (imark > -1) {
1270           for (i=0; i<imark; i++) {
1271             idx_p[i] = cmap[cworkB[i]];
1272           }
1273         } else {
1274           for (i=0; i<nzB; i++) {
1275             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1276             else break;
1277           }
1278           imark = i;
1279         }
1280         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1281         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1282       }
1283     } else {
1284       if (idx) *idx = 0;
1285       if (v)   *v   = 0;
1286     }
1287   }
1288   *nz = nztot;
1289   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1290   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1296 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1297 {
1298   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1299 
1300   PetscFunctionBegin;
1301   if (aij->getrowactive == PETSC_FALSE) {
1302     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1303   }
1304   aij->getrowactive = PETSC_FALSE;
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatNorm_MPIAIJ"
1310 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1311 {
1312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscInt       i,j,cstart = aij->cstart;
1316   PetscReal      sum = 0.0;
1317   PetscScalar    *v;
1318 
1319   PetscFunctionBegin;
1320   if (aij->size == 1) {
1321     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1322   } else {
1323     if (type == NORM_FROBENIUS) {
1324       v = amat->a;
1325       for (i=0; i<amat->nz; i++) {
1326 #if defined(PETSC_USE_COMPLEX)
1327         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1328 #else
1329         sum += (*v)*(*v); v++;
1330 #endif
1331       }
1332       v = bmat->a;
1333       for (i=0; i<bmat->nz; i++) {
1334 #if defined(PETSC_USE_COMPLEX)
1335         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1336 #else
1337         sum += (*v)*(*v); v++;
1338 #endif
1339       }
1340       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       *norm = sqrt(*norm);
1342     } else if (type == NORM_1) { /* max column norm */
1343       PetscReal *tmp,*tmp2;
1344       PetscInt    *jj,*garray = aij->garray;
1345       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1346       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1347       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1348       *norm = 0.0;
1349       v = amat->a; jj = amat->j;
1350       for (j=0; j<amat->nz; j++) {
1351         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1352       }
1353       v = bmat->a; jj = bmat->j;
1354       for (j=0; j<bmat->nz; j++) {
1355         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1356       }
1357       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1358       for (j=0; j<mat->N; j++) {
1359         if (tmp2[j] > *norm) *norm = tmp2[j];
1360       }
1361       ierr = PetscFree(tmp);CHKERRQ(ierr);
1362       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1363     } else if (type == NORM_INFINITY) { /* max row norm */
1364       PetscReal ntemp = 0.0;
1365       for (j=0; j<aij->A->m; j++) {
1366         v = amat->a + amat->i[j];
1367         sum = 0.0;
1368         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1369           sum += PetscAbsScalar(*v); v++;
1370         }
1371         v = bmat->a + bmat->i[j];
1372         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1373           sum += PetscAbsScalar(*v); v++;
1374         }
1375         if (sum > ntemp) ntemp = sum;
1376       }
1377       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1378     } else {
1379       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1380     }
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatTranspose_MPIAIJ"
1387 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1388 {
1389   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1390   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1391   PetscErrorCode ierr;
1392   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1393   Mat            B;
1394   PetscScalar    *array;
1395 
1396   PetscFunctionBegin;
1397   if (!matout && M != N) {
1398     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1399   }
1400 
1401   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1402   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1403   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1404 
1405   /* copy over the A part */
1406   Aloc = (Mat_SeqAIJ*)a->A->data;
1407   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1408   row = a->rstart;
1409   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1410   for (i=0; i<m; i++) {
1411     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1412     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1413   }
1414   aj = Aloc->j;
1415   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1416 
1417   /* copy over the B part */
1418   Aloc = (Mat_SeqAIJ*)a->B->data;
1419   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1420   row  = a->rstart;
1421   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1422   ct   = cols;
1423   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1424   for (i=0; i<m; i++) {
1425     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1426     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1427   }
1428   ierr = PetscFree(ct);CHKERRQ(ierr);
1429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   if (matout) {
1432     *matout = B;
1433   } else {
1434     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1441 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1442 {
1443   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1444   Mat            a = aij->A,b = aij->B;
1445   PetscErrorCode ierr;
1446   PetscInt       s1,s2,s3;
1447 
1448   PetscFunctionBegin;
1449   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1450   if (rr) {
1451     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1452     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1453     /* Overlap communication with computation. */
1454     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1455   }
1456   if (ll) {
1457     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1458     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1459     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1460   }
1461   /* scale  the diagonal block */
1462   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1463 
1464   if (rr) {
1465     /* Do a scatter end and then right scale the off-diagonal block */
1466     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1467     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1468   }
1469 
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1476 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1477 {
1478   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   if (!a->rank) {
1483     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1490 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1491 {
1492   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1497   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1502 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1503 {
1504   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1505   PetscErrorCode ierr;
1506 
1507   PetscFunctionBegin;
1508   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatEqual_MPIAIJ"
1514 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1515 {
1516   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1517   Mat            a,b,c,d;
1518   PetscTruth     flg;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   a = matA->A; b = matA->B;
1523   c = matB->A; d = matB->B;
1524 
1525   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1526   if (flg == PETSC_TRUE) {
1527     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1528   }
1529   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatCopy_MPIAIJ"
1535 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1536 {
1537   PetscErrorCode ierr;
1538   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1539   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1540 
1541   PetscFunctionBegin;
1542   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1543   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1544     /* because of the column compression in the off-processor part of the matrix a->B,
1545        the number of columns in a->B and b->B may be different, hence we cannot call
1546        the MatCopy() directly on the two parts. If need be, we can provide a more
1547        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1548        then copying the submatrices */
1549     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1550   } else {
1551     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1552     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1559 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1560 {
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #include "petscblaslapack.h"
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatAXPY_MPIAIJ"
1571 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1572 {
1573   PetscErrorCode ierr;
1574   PetscInt       i;
1575   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1576   PetscBLASInt   bnz,one=1;
1577   Mat_SeqAIJ     *x,*y;
1578 
1579   PetscFunctionBegin;
1580   if (str == SAME_NONZERO_PATTERN) {
1581     x = (Mat_SeqAIJ *)xx->A->data;
1582     y = (Mat_SeqAIJ *)yy->A->data;
1583     bnz = (PetscBLASInt)x->nz;
1584     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1585     x = (Mat_SeqAIJ *)xx->B->data;
1586     y = (Mat_SeqAIJ *)yy->B->data;
1587     bnz = (PetscBLASInt)x->nz;
1588     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1589   } else if (str == SUBSET_NONZERO_PATTERN) {
1590     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1591 
1592     x = (Mat_SeqAIJ *)xx->B->data;
1593     y = (Mat_SeqAIJ *)yy->B->data;
1594     if (y->xtoy && y->XtoY != xx->B) {
1595       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1596       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1597     }
1598     if (!y->xtoy) { /* get xtoy */
1599       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1600       y->XtoY = xx->B;
1601     }
1602     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1603   } else {
1604     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1605   }
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 /* -------------------------------------------------------------------*/
1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1611        MatGetRow_MPIAIJ,
1612        MatRestoreRow_MPIAIJ,
1613        MatMult_MPIAIJ,
1614 /* 4*/ MatMultAdd_MPIAIJ,
1615        MatMultTranspose_MPIAIJ,
1616        MatMultTransposeAdd_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620 /*10*/ 0,
1621        0,
1622        0,
1623        MatRelax_MPIAIJ,
1624        MatTranspose_MPIAIJ,
1625 /*15*/ MatGetInfo_MPIAIJ,
1626        MatEqual_MPIAIJ,
1627        MatGetDiagonal_MPIAIJ,
1628        MatDiagonalScale_MPIAIJ,
1629        MatNorm_MPIAIJ,
1630 /*20*/ MatAssemblyBegin_MPIAIJ,
1631        MatAssemblyEnd_MPIAIJ,
1632        0,
1633        MatSetOption_MPIAIJ,
1634        MatZeroEntries_MPIAIJ,
1635 /*25*/ MatZeroRows_MPIAIJ,
1636        0,
1637        0,
1638        0,
1639        0,
1640 /*30*/ MatSetUpPreallocation_MPIAIJ,
1641        0,
1642        0,
1643        0,
1644        0,
1645 /*35*/ MatDuplicate_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*40*/ MatAXPY_MPIAIJ,
1651        MatGetSubMatrices_MPIAIJ,
1652        MatIncreaseOverlap_MPIAIJ,
1653        MatGetValues_MPIAIJ,
1654        MatCopy_MPIAIJ,
1655 /*45*/ MatPrintHelp_MPIAIJ,
1656        MatScale_MPIAIJ,
1657        0,
1658        0,
1659        0,
1660 /*50*/ MatSetBlockSize_MPIAIJ,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*55*/ MatFDColoringCreate_MPIAIJ,
1666        0,
1667        MatSetUnfactored_MPIAIJ,
1668        0,
1669        0,
1670 /*60*/ MatGetSubMatrix_MPIAIJ,
1671        MatDestroy_MPIAIJ,
1672        MatView_MPIAIJ,
1673        MatGetPetscMaps_Petsc,
1674        0,
1675 /*65*/ 0,
1676        0,
1677        0,
1678        0,
1679        0,
1680 /*70*/ 0,
1681        0,
1682        MatSetColoring_MPIAIJ,
1683        MatSetValuesAdic_MPIAIJ,
1684        MatSetValuesAdifor_MPIAIJ,
1685 /*75*/ 0,
1686        0,
1687        0,
1688        0,
1689        0,
1690 /*80*/ 0,
1691        0,
1692        0,
1693        0,
1694 /*84*/ MatLoad_MPIAIJ,
1695        0,
1696        0,
1697        0,
1698        0,
1699        0,
1700 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1701        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1702        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1703        MatPtAP_MPIAIJ_MPIAIJ,
1704        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1705 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1706        0,
1707        0,
1708        0};
1709 
1710 /* ----------------------------------------------------------------------------------------*/
1711 
1712 EXTERN_C_BEGIN
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1715 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1716 {
1717   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1722   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 EXTERN_C_BEGIN
1728 #undef __FUNCT__
1729 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1730 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1731 {
1732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1737   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 EXTERN_C_END
1741 
1742 #include "petscpc.h"
1743 EXTERN_C_BEGIN
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1746 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1747 {
1748   Mat_MPIAIJ     *b;
1749   PetscErrorCode ierr;
1750   PetscInt       i;
1751 
1752   PetscFunctionBegin;
1753   B->preallocated = PETSC_TRUE;
1754   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1755   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1756   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1757   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1758   if (d_nnz) {
1759     for (i=0; i<B->m; i++) {
1760       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1761     }
1762   }
1763   if (o_nnz) {
1764     for (i=0; i<B->m; i++) {
1765       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1766     }
1767   }
1768   b = (Mat_MPIAIJ*)B->data;
1769   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1770   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1771 
1772   PetscFunctionReturn(0);
1773 }
1774 EXTERN_C_END
1775 
1776 /*MC
1777    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1778 
1779    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1780    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1781   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1782   for communicators controlling multiple processes.  It is recommended that you call both of
1783   the above preallocation routines for simplicity.
1784 
1785    Options Database Keys:
1786 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1787 
1788   Level: beginner
1789 
1790 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1791 M*/
1792 
1793 EXTERN_C_BEGIN
1794 #undef __FUNCT__
1795 #define __FUNCT__ "MatCreate_AIJ"
1796 PetscErrorCode MatCreate_AIJ(Mat A)
1797 {
1798   PetscErrorCode ierr;
1799   PetscMPIInt    size;
1800 
1801   PetscFunctionBegin;
1802   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1803   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1804   if (size == 1) {
1805     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1816 {
1817   Mat            mat;
1818   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   *newmat       = 0;
1823   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1824   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1825   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1826   a    = (Mat_MPIAIJ*)mat->data;
1827 
1828   mat->factor       = matin->factor;
1829   mat->bs           = matin->bs;
1830   mat->assembled    = PETSC_TRUE;
1831   mat->insertmode   = NOT_SET_VALUES;
1832   mat->preallocated = PETSC_TRUE;
1833 
1834   a->rstart       = oldmat->rstart;
1835   a->rend         = oldmat->rend;
1836   a->cstart       = oldmat->cstart;
1837   a->cend         = oldmat->cend;
1838   a->size         = oldmat->size;
1839   a->rank         = oldmat->rank;
1840   a->donotstash   = oldmat->donotstash;
1841   a->roworiented  = oldmat->roworiented;
1842   a->rowindices   = 0;
1843   a->rowvalues    = 0;
1844   a->getrowactive = PETSC_FALSE;
1845 
1846   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1847   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1848   if (oldmat->colmap) {
1849 #if defined (PETSC_USE_CTABLE)
1850     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1851 #else
1852     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1853     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1854     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1855 #endif
1856   } else a->colmap = 0;
1857   if (oldmat->garray) {
1858     PetscInt len;
1859     len  = oldmat->B->n;
1860     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1861     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1862     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1863   } else a->garray = 0;
1864 
1865   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1866   PetscLogObjectParent(mat,a->lvec);
1867   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1868   PetscLogObjectParent(mat,a->Mvctx);
1869   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1870   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1871   PetscLogObjectParent(mat,a->A);
1872   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1873   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1874   PetscLogObjectParent(mat,a->B);
1875   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1876   *newmat = mat;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #include "petscsys.h"
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatLoad_MPIAIJ"
1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1885 {
1886   Mat            A;
1887   PetscScalar    *vals,*svals;
1888   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1889   MPI_Status     status;
1890   PetscErrorCode ierr;
1891   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1892   PetscInt       i,nz,j,rstart,rend;
1893   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1894   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1895   PetscInt       cend,cstart,n,*rowners;
1896   int            fd;
1897 
1898   PetscFunctionBegin;
1899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1900   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1901   if (!rank) {
1902     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1903     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1904     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1905   }
1906 
1907   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1908   M = header[1]; N = header[2];
1909   /* determine ownership of all rows */
1910   m    = M/size + ((M % size) > rank);
1911   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1912   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1913   rowners[0] = 0;
1914   for (i=2; i<=size; i++) {
1915     rowners[i] += rowners[i-1];
1916   }
1917   rstart = rowners[rank];
1918   rend   = rowners[rank+1];
1919 
1920   /* distribute row lengths to all processors */
1921   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1922   if (!rank) {
1923     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1924     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1925     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1926     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1927     for (j=0; j<m; j++) {
1928       procsnz[0] += ourlens[j];
1929     }
1930     for (i=1; i<size; i++) {
1931       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1932       /* calculate the number of nonzeros on each processor */
1933       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1934         procsnz[i] += rowlengths[j];
1935       }
1936       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1937     }
1938     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1939   } else {
1940     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1941   }
1942 
1943   if (!rank) {
1944     /* determine max buffer needed and allocate it */
1945     maxnz = 0;
1946     for (i=0; i<size; i++) {
1947       maxnz = PetscMax(maxnz,procsnz[i]);
1948     }
1949     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1950 
1951     /* read in my part of the matrix column indices  */
1952     nz   = procsnz[0];
1953     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1954     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1955 
1956     /* read in every one elses and ship off */
1957     for (i=1; i<size; i++) {
1958       nz   = procsnz[i];
1959       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1960       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1961     }
1962     ierr = PetscFree(cols);CHKERRQ(ierr);
1963   } else {
1964     /* determine buffer space needed for message */
1965     nz = 0;
1966     for (i=0; i<m; i++) {
1967       nz += ourlens[i];
1968     }
1969     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1970 
1971     /* receive message of column indices*/
1972     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1973     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1974     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1975   }
1976 
1977   /* determine column ownership if matrix is not square */
1978   if (N != M) {
1979     n      = N/size + ((N % size) > rank);
1980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1981     cstart = cend - n;
1982   } else {
1983     cstart = rstart;
1984     cend   = rend;
1985     n      = cend - cstart;
1986   }
1987 
1988   /* loop over local rows, determining number of off diagonal entries */
1989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1990   jj = 0;
1991   for (i=0; i<m; i++) {
1992     for (j=0; j<ourlens[i]; j++) {
1993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1994       jj++;
1995     }
1996   }
1997 
1998   /* create our matrix */
1999   for (i=0; i<m; i++) {
2000     ourlens[i] -= offlens[i];
2001   }
2002   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2003   ierr = MatSetType(A,type);CHKERRQ(ierr);
2004   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2005 
2006   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2007   for (i=0; i<m; i++) {
2008     ourlens[i] += offlens[i];
2009   }
2010 
2011   if (!rank) {
2012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2013 
2014     /* read in my part of the matrix numerical values  */
2015     nz   = procsnz[0];
2016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2017 
2018     /* insert into matrix */
2019     jj      = rstart;
2020     smycols = mycols;
2021     svals   = vals;
2022     for (i=0; i<m; i++) {
2023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2024       smycols += ourlens[i];
2025       svals   += ourlens[i];
2026       jj++;
2027     }
2028 
2029     /* read in other processors and ship out */
2030     for (i=1; i<size; i++) {
2031       nz   = procsnz[i];
2032       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2033       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2034     }
2035     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2036   } else {
2037     /* receive numeric values */
2038     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2039 
2040     /* receive message of values*/
2041     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2042     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2043     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2044 
2045     /* insert into matrix */
2046     jj      = rstart;
2047     smycols = mycols;
2048     svals   = vals;
2049     for (i=0; i<m; i++) {
2050       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2051       smycols += ourlens[i];
2052       svals   += ourlens[i];
2053       jj++;
2054     }
2055   }
2056   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2057   ierr = PetscFree(vals);CHKERRQ(ierr);
2058   ierr = PetscFree(mycols);CHKERRQ(ierr);
2059   ierr = PetscFree(rowners);CHKERRQ(ierr);
2060 
2061   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   *newmat = A;
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2069 /*
2070     Not great since it makes two copies of the submatrix, first an SeqAIJ
2071   in local and then by concatenating the local matrices the end result.
2072   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2073 */
2074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2075 {
2076   PetscErrorCode ierr;
2077   PetscMPIInt    rank,size;
2078   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2079   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2080   Mat            *local,M,Mreuse;
2081   PetscScalar    *vwork,*aa;
2082   MPI_Comm       comm = mat->comm;
2083   Mat_SeqAIJ     *aij;
2084 
2085 
2086   PetscFunctionBegin;
2087   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2088   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2089 
2090   if (call ==  MAT_REUSE_MATRIX) {
2091     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2092     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2093     local = &Mreuse;
2094     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2095   } else {
2096     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2097     Mreuse = *local;
2098     ierr   = PetscFree(local);CHKERRQ(ierr);
2099   }
2100 
2101   /*
2102       m - number of local rows
2103       n - number of columns (same on all processors)
2104       rstart - first row in new global matrix generated
2105   */
2106   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2107   if (call == MAT_INITIAL_MATRIX) {
2108     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2109     ii  = aij->i;
2110     jj  = aij->j;
2111 
2112     /*
2113         Determine the number of non-zeros in the diagonal and off-diagonal
2114         portions of the matrix in order to do correct preallocation
2115     */
2116 
2117     /* first get start and end of "diagonal" columns */
2118     if (csize == PETSC_DECIDE) {
2119       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2120       if (mglobal == n) { /* square matrix */
2121 	nlocal = m;
2122       } else {
2123         nlocal = n/size + ((n % size) > rank);
2124       }
2125     } else {
2126       nlocal = csize;
2127     }
2128     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2129     rstart = rend - nlocal;
2130     if (rank == size - 1 && rend != n) {
2131       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2132     }
2133 
2134     /* next, compute all the lengths */
2135     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2136     olens = dlens + m;
2137     for (i=0; i<m; i++) {
2138       jend = ii[i+1] - ii[i];
2139       olen = 0;
2140       dlen = 0;
2141       for (j=0; j<jend; j++) {
2142         if (*jj < rstart || *jj >= rend) olen++;
2143         else dlen++;
2144         jj++;
2145       }
2146       olens[i] = olen;
2147       dlens[i] = dlen;
2148     }
2149     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2150     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2151     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2152     ierr = PetscFree(dlens);CHKERRQ(ierr);
2153   } else {
2154     PetscInt ml,nl;
2155 
2156     M = *newmat;
2157     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2158     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2159     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2160     /*
2161          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2162        rather than the slower MatSetValues().
2163     */
2164     M->was_assembled = PETSC_TRUE;
2165     M->assembled     = PETSC_FALSE;
2166   }
2167   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2168   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2169   ii  = aij->i;
2170   jj  = aij->j;
2171   aa  = aij->a;
2172   for (i=0; i<m; i++) {
2173     row   = rstart + i;
2174     nz    = ii[i+1] - ii[i];
2175     cwork = jj;     jj += nz;
2176     vwork = aa;     aa += nz;
2177     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2178   }
2179 
2180   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   *newmat = M;
2183 
2184   /* save submatrix used in processor for next request */
2185   if (call ==  MAT_INITIAL_MATRIX) {
2186     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2187     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2188   }
2189 
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 EXTERN_C_BEGIN
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2197 {
2198   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2199   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2200   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2201   const PetscInt *JJ;
2202   PetscScalar    *values;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206 #if defined(PETSC_OPT_g)
2207   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2208 #endif
2209   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2210   o_nnz = d_nnz + m;
2211 
2212   for (i=0; i<m; i++) {
2213     nnz     = I[i+1]- I[i];
2214     JJ      = J + I[i];
2215     nnz_max = PetscMax(nnz_max,nnz);
2216 #if defined(PETSC_OPT_g)
2217     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2218 #endif
2219     for (j=0; j<nnz; j++) {
2220       if (*JJ >= cstart) break;
2221       JJ++;
2222     }
2223     d = 0;
2224     for (; j<nnz; j++) {
2225       if (*JJ++ >= cend) break;
2226       d++;
2227     }
2228     d_nnz[i] = d;
2229     o_nnz[i] = nnz - d;
2230   }
2231   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2232   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2233 
2234   if (v) values = (PetscScalar*)v;
2235   else {
2236     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2237     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2238   }
2239 
2240   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2241   for (i=0; i<m; i++) {
2242     ii   = i + rstart;
2243     nnz  = I[i+1]- I[i];
2244     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2245   }
2246   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2248   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2249 
2250   if (!v) {
2251     ierr = PetscFree(values);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 EXTERN_C_END
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2259 /*@C
2260    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2261    (the default parallel PETSc format).
2262 
2263    Collective on MPI_Comm
2264 
2265    Input Parameters:
2266 +  A - the matrix
2267 .  i - the indices into j for the start of each local row (starts with zero)
2268 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2269 -  v - optional values in the matrix
2270 
2271    Level: developer
2272 
2273 .keywords: matrix, aij, compressed row, sparse, parallel
2274 
2275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2276 @*/
2277 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2278 {
2279   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2283   if (f) {
2284     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2291 /*@C
2292    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2293    (the default parallel PETSc format).  For good matrix assembly performance
2294    the user should preallocate the matrix storage by setting the parameters
2295    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2296    performance can be increased by more than a factor of 50.
2297 
2298    Collective on MPI_Comm
2299 
2300    Input Parameters:
2301 +  A - the matrix
2302 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2303            (same value is used for all local rows)
2304 .  d_nnz - array containing the number of nonzeros in the various rows of the
2305            DIAGONAL portion of the local submatrix (possibly different for each row)
2306            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2307            The size of this array is equal to the number of local rows, i.e 'm'.
2308            You must leave room for the diagonal entry even if it is zero.
2309 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2310            submatrix (same value is used for all local rows).
2311 -  o_nnz - array containing the number of nonzeros in the various rows of the
2312            OFF-DIAGONAL portion of the local submatrix (possibly different for
2313            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2314            structure. The size of this array is equal to the number
2315            of local rows, i.e 'm'.
2316 
2317    If the *_nnz parameter is given then the *_nz parameter is ignored
2318 
2319    The AIJ format (also called the Yale sparse matrix format or
2320    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2321    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2322 
2323    The parallel matrix is partitioned such that the first m0 rows belong to
2324    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2325    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2326 
2327    The DIAGONAL portion of the local submatrix of a processor can be defined
2328    as the submatrix which is obtained by extraction the part corresponding
2329    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2330    first row that belongs to the processor, and r2 is the last row belonging
2331    to the this processor. This is a square mxm matrix. The remaining portion
2332    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2333 
2334    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2335 
2336    Example usage:
2337 
2338    Consider the following 8x8 matrix with 34 non-zero values, that is
2339    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2340    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2341    as follows:
2342 
2343 .vb
2344             1  2  0  |  0  3  0  |  0  4
2345     Proc0   0  5  6  |  7  0  0  |  8  0
2346             9  0 10  | 11  0  0  | 12  0
2347     -------------------------------------
2348            13  0 14  | 15 16 17  |  0  0
2349     Proc1   0 18  0  | 19 20 21  |  0  0
2350             0  0  0  | 22 23  0  | 24  0
2351     -------------------------------------
2352     Proc2  25 26 27  |  0  0 28  | 29  0
2353            30  0  0  | 31 32 33  |  0 34
2354 .ve
2355 
2356    This can be represented as a collection of submatrices as:
2357 
2358 .vb
2359       A B C
2360       D E F
2361       G H I
2362 .ve
2363 
2364    Where the submatrices A,B,C are owned by proc0, D,E,F are
2365    owned by proc1, G,H,I are owned by proc2.
2366 
2367    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2368    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2369    The 'M','N' parameters are 8,8, and have the same values on all procs.
2370 
2371    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2372    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2373    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2374    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2375    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2376    matrix, ans [DF] as another SeqAIJ matrix.
2377 
2378    When d_nz, o_nz parameters are specified, d_nz storage elements are
2379    allocated for every row of the local diagonal submatrix, and o_nz
2380    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2381    One way to choose d_nz and o_nz is to use the max nonzerors per local
2382    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2383    In this case, the values of d_nz,o_nz are:
2384 .vb
2385      proc0 : dnz = 2, o_nz = 2
2386      proc1 : dnz = 3, o_nz = 2
2387      proc2 : dnz = 1, o_nz = 4
2388 .ve
2389    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2390    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2391    for proc3. i.e we are using 12+15+10=37 storage locations to store
2392    34 values.
2393 
2394    When d_nnz, o_nnz parameters are specified, the storage is specified
2395    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2396    In the above case the values for d_nnz,o_nnz are:
2397 .vb
2398      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2399      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2400      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2401 .ve
2402    Here the space allocated is sum of all the above values i.e 34, and
2403    hence pre-allocation is perfect.
2404 
2405    Level: intermediate
2406 
2407 .keywords: matrix, aij, compressed row, sparse, parallel
2408 
2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2410           MPIAIJ
2411 @*/
2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2413 {
2414   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2418   if (f) {
2419     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatCreateMPIAIJ"
2426 /*@C
2427    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2428    (the default parallel PETSc format).  For good matrix assembly performance
2429    the user should preallocate the matrix storage by setting the parameters
2430    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2431    performance can be increased by more than a factor of 50.
2432 
2433    Collective on MPI_Comm
2434 
2435    Input Parameters:
2436 +  comm - MPI communicator
2437 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2438            This value should be the same as the local size used in creating the
2439            y vector for the matrix-vector product y = Ax.
2440 .  n - This value should be the same as the local size used in creating the
2441        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2442        calculated if N is given) For square matrices n is almost always m.
2443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2445 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2446            (same value is used for all local rows)
2447 .  d_nnz - array containing the number of nonzeros in the various rows of the
2448            DIAGONAL portion of the local submatrix (possibly different for each row)
2449            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2450            The size of this array is equal to the number of local rows, i.e 'm'.
2451            You must leave room for the diagonal entry even if it is zero.
2452 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2453            submatrix (same value is used for all local rows).
2454 -  o_nnz - array containing the number of nonzeros in the various rows of the
2455            OFF-DIAGONAL portion of the local submatrix (possibly different for
2456            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2457            structure. The size of this array is equal to the number
2458            of local rows, i.e 'm'.
2459 
2460    Output Parameter:
2461 .  A - the matrix
2462 
2463    Notes:
2464    If the *_nnz parameter is given then the *_nz parameter is ignored
2465 
2466    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2467    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2468    storage requirements for this matrix.
2469 
2470    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2471    processor than it must be used on all processors that share the object for
2472    that argument.
2473 
2474    The user MUST specify either the local or global matrix dimensions
2475    (possibly both).
2476 
2477    The parallel matrix is partitioned such that the first m0 rows belong to
2478    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2479    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2480 
2481    The DIAGONAL portion of the local submatrix of a processor can be defined
2482    as the submatrix which is obtained by extraction the part corresponding
2483    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2484    first row that belongs to the processor, and r2 is the last row belonging
2485    to the this processor. This is a square mxm matrix. The remaining portion
2486    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2487 
2488    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2489 
2490    When calling this routine with a single process communicator, a matrix of
2491    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2492    type of communicator, use the construction mechanism:
2493      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2494 
2495    By default, this format uses inodes (identical nodes) when possible.
2496    We search for consecutive rows with the same nonzero structure, thereby
2497    reusing matrix information to achieve increased efficiency.
2498 
2499    Options Database Keys:
2500 +  -mat_aij_no_inode  - Do not use inodes
2501 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2502 -  -mat_aij_oneindex - Internally use indexing starting at 1
2503         rather than 0.  Note that when calling MatSetValues(),
2504         the user still MUST index entries starting at 0!
2505 
2506 
2507    Example usage:
2508 
2509    Consider the following 8x8 matrix with 34 non-zero values, that is
2510    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2511    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2512    as follows:
2513 
2514 .vb
2515             1  2  0  |  0  3  0  |  0  4
2516     Proc0   0  5  6  |  7  0  0  |  8  0
2517             9  0 10  | 11  0  0  | 12  0
2518     -------------------------------------
2519            13  0 14  | 15 16 17  |  0  0
2520     Proc1   0 18  0  | 19 20 21  |  0  0
2521             0  0  0  | 22 23  0  | 24  0
2522     -------------------------------------
2523     Proc2  25 26 27  |  0  0 28  | 29  0
2524            30  0  0  | 31 32 33  |  0 34
2525 .ve
2526 
2527    This can be represented as a collection of submatrices as:
2528 
2529 .vb
2530       A B C
2531       D E F
2532       G H I
2533 .ve
2534 
2535    Where the submatrices A,B,C are owned by proc0, D,E,F are
2536    owned by proc1, G,H,I are owned by proc2.
2537 
2538    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2539    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2540    The 'M','N' parameters are 8,8, and have the same values on all procs.
2541 
2542    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2543    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2544    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2545    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2546    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2547    matrix, ans [DF] as another SeqAIJ matrix.
2548 
2549    When d_nz, o_nz parameters are specified, d_nz storage elements are
2550    allocated for every row of the local diagonal submatrix, and o_nz
2551    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2552    One way to choose d_nz and o_nz is to use the max nonzerors per local
2553    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2554    In this case, the values of d_nz,o_nz are:
2555 .vb
2556      proc0 : dnz = 2, o_nz = 2
2557      proc1 : dnz = 3, o_nz = 2
2558      proc2 : dnz = 1, o_nz = 4
2559 .ve
2560    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2561    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2562    for proc3. i.e we are using 12+15+10=37 storage locations to store
2563    34 values.
2564 
2565    When d_nnz, o_nnz parameters are specified, the storage is specified
2566    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2567    In the above case the values for d_nnz,o_nnz are:
2568 .vb
2569      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2570      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2571      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2572 .ve
2573    Here the space allocated is sum of all the above values i.e 34, and
2574    hence pre-allocation is perfect.
2575 
2576    Level: intermediate
2577 
2578 .keywords: matrix, aij, compressed row, sparse, parallel
2579 
2580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2581           MPIAIJ
2582 @*/
2583 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2584 {
2585   PetscErrorCode ierr;
2586   PetscMPIInt    size;
2587 
2588   PetscFunctionBegin;
2589   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2591   if (size > 1) {
2592     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2593     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2594   } else {
2595     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2596     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2597   }
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2604 {
2605   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2606 
2607   PetscFunctionBegin;
2608   *Ad     = a->A;
2609   *Ao     = a->B;
2610   *colmap = a->garray;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2617 {
2618   PetscErrorCode ierr;
2619   PetscInt       i;
2620   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2621 
2622   PetscFunctionBegin;
2623   if (coloring->ctype == IS_COLORING_LOCAL) {
2624     ISColoringValue *allcolors,*colors;
2625     ISColoring      ocoloring;
2626 
2627     /* set coloring for diagonal portion */
2628     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2629 
2630     /* set coloring for off-diagonal portion */
2631     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2632     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2633     for (i=0; i<a->B->n; i++) {
2634       colors[i] = allcolors[a->garray[i]];
2635     }
2636     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2637     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2638     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2640   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2641     ISColoringValue *colors;
2642     PetscInt             *larray;
2643     ISColoring      ocoloring;
2644 
2645     /* set coloring for diagonal portion */
2646     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2647     for (i=0; i<a->A->n; i++) {
2648       larray[i] = i + a->cstart;
2649     }
2650     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2651     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2652     for (i=0; i<a->A->n; i++) {
2653       colors[i] = coloring->colors[larray[i]];
2654     }
2655     ierr = PetscFree(larray);CHKERRQ(ierr);
2656     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2657     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2658     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2659 
2660     /* set coloring for off-diagonal portion */
2661     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2662     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2663     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2664     for (i=0; i<a->B->n; i++) {
2665       colors[i] = coloring->colors[larray[i]];
2666     }
2667     ierr = PetscFree(larray);CHKERRQ(ierr);
2668     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2669     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2670     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2671   } else {
2672     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2673   }
2674 
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 #undef __FUNCT__
2679 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2680 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2681 {
2682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2683   PetscErrorCode ierr;
2684 
2685   PetscFunctionBegin;
2686   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2687   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2693 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2694 {
2695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2700   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatMerge"
2706 /*@C
2707       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2708                  matrices from each processor
2709 
2710     Collective on MPI_Comm
2711 
2712    Input Parameters:
2713 +    comm - the communicators the parallel matrix will live on
2714 .    inmat - the input sequential matrices
2715 .    n - number of local columns (or PETSC_DECIDE)
2716 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2717 
2718    Output Parameter:
2719 .    outmat - the parallel matrix generated
2720 
2721     Level: advanced
2722 
2723    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2724 
2725 @*/
2726 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2727 {
2728   PetscErrorCode ierr;
2729   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2730   PetscInt       *indx;
2731   PetscScalar    *values;
2732   PetscMap       columnmap,rowmap;
2733 
2734   PetscFunctionBegin;
2735     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2736   /*
2737   PetscMPIInt       rank;
2738   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2739   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2740   */
2741   if (scall == MAT_INITIAL_MATRIX){
2742     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2743     if (n == PETSC_DECIDE){
2744       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2745       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2746       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2747       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2748       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2749     }
2750 
2751     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2752     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2753     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2754     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2755     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2756 
2757     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2758     for (i=0;i<m;i++) {
2759       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2760       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2761       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2762     }
2763     /* This routine will ONLY return MPIAIJ type matrix */
2764     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2765     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2766     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2767     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2768 
2769   } else if (scall == MAT_REUSE_MATRIX){
2770     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2771   } else {
2772     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2773   }
2774 
2775   for (i=0;i<m;i++) {
2776     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2777     I    = i + rstart;
2778     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2779     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2780   }
2781   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2782   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784 
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatFileSplit"
2790 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2791 {
2792   PetscErrorCode    ierr;
2793   PetscMPIInt       rank;
2794   PetscInt          m,N,i,rstart,nnz;
2795   size_t            len;
2796   const PetscInt    *indx;
2797   PetscViewer       out;
2798   char              *name;
2799   Mat               B;
2800   const PetscScalar *values;
2801 
2802   PetscFunctionBegin;
2803   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2804   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2805   /* Should this be the type of the diagonal block of A? */
2806   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2807   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2808   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2809   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2810   for (i=0;i<m;i++) {
2811     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2812     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2813     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2814   }
2815   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2816   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2817 
2818   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2819   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2820   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2821   sprintf(name,"%s.%d",outfile,rank);
2822   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2823   ierr = PetscFree(name);
2824   ierr = MatView(B,out);CHKERRQ(ierr);
2825   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2826   ierr = MatDestroy(B);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2831 #undef __FUNCT__
2832 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2833 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2834 {
2835   PetscErrorCode       ierr;
2836   Mat_Merge_SeqsToMPI  *merge;
2837   PetscObjectContainer container;
2838 
2839   PetscFunctionBegin;
2840   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2841   if (container) {
2842     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2843     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2844     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2845     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2846     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2848     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2850     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2851     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2852     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2853     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2854 
2855     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2856     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2857   }
2858   ierr = PetscFree(merge);CHKERRQ(ierr);
2859 
2860   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #include "src/mat/utils/freespace.h"
2865 #include "petscbt.h"
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatMerge_SeqsToMPI"
2868 /*@C
2869       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2870                  matrices from each processor
2871 
2872     Collective on MPI_Comm
2873 
2874    Input Parameters:
2875 +    comm - the communicators the parallel matrix will live on
2876 .    seqmat - the input sequential matrices
2877 .    m - number of local rows (or PETSC_DECIDE)
2878 .    n - number of local columns (or PETSC_DECIDE)
2879 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2880 
2881    Output Parameter:
2882 .    mpimat - the parallel matrix generated
2883 
2884     Level: advanced
2885 
2886    Notes:
2887      The dimensions of the sequential matrix in each processor MUST be the same.
2888      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2889      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2890 @*/
2891 static PetscEvent logkey_seqstompinum = 0;
2892 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2893 {
2894   PetscErrorCode       ierr;
2895   MPI_Comm             comm=mpimat->comm;
2896   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2897   PetscMPIInt          size,rank,taga,*len_s;
2898   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2899   PetscInt             proc,m;
2900   PetscInt             **buf_ri,**buf_rj;
2901   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2902   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2903   MPI_Request          *s_waits,*r_waits;
2904   MPI_Status           *status;
2905   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2906   Mat_Merge_SeqsToMPI  *merge;
2907   PetscObjectContainer container;
2908 
2909   PetscFunctionBegin;
2910   if (!logkey_seqstompinum) {
2911     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2912   }
2913   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2914 
2915   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2916   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2917 
2918   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2919   if (container) {
2920     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2921   }
2922   bi     = merge->bi;
2923   bj     = merge->bj;
2924   buf_ri = merge->buf_ri;
2925   buf_rj = merge->buf_rj;
2926 
2927   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2928   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2929   len_s  = merge->len_s;
2930 
2931   /* send and recv matrix values */
2932   /*-----------------------------*/
2933   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2934   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2935 
2936   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2937   for (proc=0,k=0; proc<size; proc++){
2938     if (!len_s[proc]) continue;
2939     i = owners[proc];
2940     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2941     k++;
2942   }
2943 
2944   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2945   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2946   ierr = PetscFree(status);CHKERRQ(ierr);
2947 
2948   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2949   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2950 
2951   /* insert mat values of mpimat */
2952   /*----------------------------*/
2953   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2954   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2955   nextrow = buf_ri_k + merge->nrecv;
2956   nextai  = nextrow + merge->nrecv;
2957 
2958   for (k=0; k<merge->nrecv; k++){
2959     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2960     nrows = *(buf_ri_k[k]);
2961     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2962     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2963   }
2964 
2965   /* set values of ba */
2966   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2967   for (i=0; i<m; i++) {
2968     arow = owners[rank] + i;
2969     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2970     bnzi = bi[i+1] - bi[i];
2971     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2972 
2973     /* add local non-zero vals of this proc's seqmat into ba */
2974     anzi = ai[arow+1] - ai[arow];
2975     aj   = a->j + ai[arow];
2976     aa   = a->a + ai[arow];
2977     nextaj = 0;
2978     for (j=0; nextaj<anzi; j++){
2979       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2980         ba_i[j] += aa[nextaj++];
2981       }
2982     }
2983 
2984     /* add received vals into ba */
2985     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2986       /* i-th row */
2987       if (i == *nextrow[k]) {
2988         anzi = *(nextai[k]+1) - *nextai[k];
2989         aj   = buf_rj[k] + *(nextai[k]);
2990         aa   = abuf_r[k] + *(nextai[k]);
2991         nextaj = 0;
2992         for (j=0; nextaj<anzi; j++){
2993           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2994             ba_i[j] += aa[nextaj++];
2995           }
2996         }
2997         nextrow[k]++; nextai[k]++;
2998       }
2999     }
3000     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3001   }
3002   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3003   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004 
3005   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3006   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3007   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3008   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 static PetscEvent logkey_seqstompisym = 0;
3012 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3013 {
3014   PetscErrorCode       ierr;
3015   Mat                  B_mpi;
3016   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3017   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3018   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3019   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3020   PetscInt             len,proc,*dnz,*onz;
3021   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3022   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3023   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3024   MPI_Status           *status;
3025   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3026   PetscBT              lnkbt;
3027   Mat_Merge_SeqsToMPI  *merge;
3028   PetscObjectContainer container;
3029 
3030   PetscFunctionBegin;
3031   if (!logkey_seqstompisym) {
3032     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3033   }
3034   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3035 
3036   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3037   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3038 
3039   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3040   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3041 
3042   /* determine row ownership */
3043   /*---------------------------------------------------------*/
3044   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3045   if (m == PETSC_DECIDE) {
3046     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3047   } else {
3048     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3049   }
3050   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3051   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3052   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3053 
3054   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3055   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3056 
3057   /* determine the number of messages to send, their lengths */
3058   /*---------------------------------------------------------*/
3059   len_s  = merge->len_s;
3060 
3061   len = 0;  /* length of buf_si[] */
3062   merge->nsend = 0;
3063   for (proc=0; proc<size; proc++){
3064     len_si[proc] = 0;
3065     if (proc == rank){
3066       len_s[proc] = 0;
3067     } else {
3068       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3069       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3070     }
3071     if (len_s[proc]) {
3072       merge->nsend++;
3073       nrows = 0;
3074       for (i=owners[proc]; i<owners[proc+1]; i++){
3075         if (ai[i+1] > ai[i]) nrows++;
3076       }
3077       len_si[proc] = 2*(nrows+1);
3078       len += len_si[proc];
3079     }
3080   }
3081 
3082   /* determine the number and length of messages to receive for ij-structure */
3083   /*-------------------------------------------------------------------------*/
3084   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3085   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3086 
3087   /* post the Irecv of j-structure */
3088   /*-------------------------------*/
3089   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3090   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3091 
3092   /* post the Isend of j-structure */
3093   /*--------------------------------*/
3094   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3095   sj_waits = si_waits + merge->nsend;
3096 
3097   for (proc=0, k=0; proc<size; proc++){
3098     if (!len_s[proc]) continue;
3099     i = owners[proc];
3100     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3101     k++;
3102   }
3103 
3104   /* receives and sends of j-structure are complete */
3105   /*------------------------------------------------*/
3106   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3107   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3108 
3109   /* send and recv i-structure */
3110   /*---------------------------*/
3111   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3112   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3113 
3114   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3115   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3116   for (proc=0,k=0; proc<size; proc++){
3117     if (!len_s[proc]) continue;
3118     /* form outgoing message for i-structure:
3119          buf_si[0]:                 nrows to be sent
3120                [1:nrows]:           row index (global)
3121                [nrows+1:2*nrows+1]: i-structure index
3122     */
3123     /*-------------------------------------------*/
3124     nrows = len_si[proc]/2 - 1;
3125     buf_si_i    = buf_si + nrows+1;
3126     buf_si[0]   = nrows;
3127     buf_si_i[0] = 0;
3128     nrows = 0;
3129     for (i=owners[proc]; i<owners[proc+1]; i++){
3130       anzi = ai[i+1] - ai[i];
3131       if (anzi) {
3132         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3133         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3134         nrows++;
3135       }
3136     }
3137     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3138     k++;
3139     buf_si += len_si[proc];
3140   }
3141 
3142   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3143   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3144 
3145   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3146   for (i=0; i<merge->nrecv; i++){
3147     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3148   }
3149 
3150   ierr = PetscFree(len_si);CHKERRQ(ierr);
3151   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3152   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3153   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3154   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3155   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3156   ierr = PetscFree(status);CHKERRQ(ierr);
3157 
3158   /* compute a local seq matrix in each processor */
3159   /*----------------------------------------------*/
3160   /* allocate bi array and free space for accumulating nonzero column info */
3161   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3162   bi[0] = 0;
3163 
3164   /* create and initialize a linked list */
3165   nlnk = N+1;
3166   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3167 
3168   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3169   len = 0;
3170   len  = ai[owners[rank+1]] - ai[owners[rank]];
3171   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3172   current_space = free_space;
3173 
3174   /* determine symbolic info for each local row */
3175   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3176   nextrow = buf_ri_k + merge->nrecv;
3177   nextai  = nextrow + merge->nrecv;
3178   for (k=0; k<merge->nrecv; k++){
3179     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3180     nrows = *buf_ri_k[k];
3181     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3182     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3183   }
3184 
3185   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3186   len = 0;
3187   for (i=0;i<m;i++) {
3188     bnzi   = 0;
3189     /* add local non-zero cols of this proc's seqmat into lnk */
3190     arow   = owners[rank] + i;
3191     anzi   = ai[arow+1] - ai[arow];
3192     aj     = a->j + ai[arow];
3193     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3194     bnzi += nlnk;
3195     /* add received col data into lnk */
3196     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3197       if (i == *nextrow[k]) { /* i-th row */
3198         anzi = *(nextai[k]+1) - *nextai[k];
3199         aj   = buf_rj[k] + *nextai[k];
3200         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3201         bnzi += nlnk;
3202         nextrow[k]++; nextai[k]++;
3203       }
3204     }
3205     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3206 
3207     /* if free space is not available, make more free space */
3208     if (current_space->local_remaining<bnzi) {
3209       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3210       nspacedouble++;
3211     }
3212     /* copy data into free space, then initialize lnk */
3213     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3214     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3215 
3216     current_space->array           += bnzi;
3217     current_space->local_used      += bnzi;
3218     current_space->local_remaining -= bnzi;
3219 
3220     bi[i+1] = bi[i] + bnzi;
3221   }
3222 
3223   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3224 
3225   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3226   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3227   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3228 
3229   /* create symbolic parallel matrix B_mpi */
3230   /*---------------------------------------*/
3231   if (n==PETSC_DECIDE) {
3232     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3233   } else {
3234     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3235   }
3236   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3237   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3238   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3239 
3240   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3241   B_mpi->assembled     = PETSC_FALSE;
3242   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3243   merge->bi            = bi;
3244   merge->bj            = bj;
3245   merge->buf_ri        = buf_ri;
3246   merge->buf_rj        = buf_rj;
3247   merge->coi           = PETSC_NULL;
3248   merge->coj           = PETSC_NULL;
3249   merge->owners_co     = PETSC_NULL;
3250 
3251   /* attach the supporting struct to B_mpi for reuse */
3252   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3253   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3254   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3255   *mpimat = B_mpi;
3256   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 static PetscEvent logkey_seqstompi = 0;
3261 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3262 {
3263   PetscErrorCode   ierr;
3264 
3265   PetscFunctionBegin;
3266   if (!logkey_seqstompi) {
3267     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3268   }
3269   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3270   if (scall == MAT_INITIAL_MATRIX){
3271     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3272   }
3273   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3274   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3275   PetscFunctionReturn(0);
3276 }
3277 static PetscEvent logkey_getlocalmat = 0;
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatGetLocalMat"
3280 /*@C
3281      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3282 
3283     Not Collective
3284 
3285    Input Parameters:
3286 +    A - the matrix
3287 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3288 
3289    Output Parameter:
3290 .    A_loc - the local sequential matrix generated
3291 
3292     Level: developer
3293 
3294 @*/
3295 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3296 {
3297   PetscErrorCode  ierr;
3298   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3299   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3300   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3301   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3302   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3303   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3304 
3305   PetscFunctionBegin;
3306   if (!logkey_getlocalmat) {
3307     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3308   }
3309   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3310   if (scall == MAT_INITIAL_MATRIX){
3311     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3312     ci[0] = 0;
3313     for (i=0; i<am; i++){
3314       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3315     }
3316     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3317     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3318     k = 0;
3319     for (i=0; i<am; i++) {
3320       ncols_o = bi[i+1] - bi[i];
3321       ncols_d = ai[i+1] - ai[i];
3322       /* off-diagonal portion of A */
3323       for (jo=0; jo<ncols_o; jo++) {
3324         col = cmap[*bj];
3325         if (col >= cstart) break;
3326         cj[k]   = col; bj++;
3327         ca[k++] = *ba++;
3328       }
3329       /* diagonal portion of A */
3330       for (j=0; j<ncols_d; j++) {
3331         cj[k]   = cstart + *aj++;
3332         ca[k++] = *aa++;
3333       }
3334       /* off-diagonal portion of A */
3335       for (j=jo; j<ncols_o; j++) {
3336         cj[k]   = cmap[*bj++];
3337         ca[k++] = *ba++;
3338       }
3339     }
3340     /* put together the new matrix */
3341     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3342     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3343     /* Since these are PETSc arrays, change flags to free them as necessary. */
3344     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3345     mat->freedata = PETSC_TRUE;
3346     mat->nonew    = 0;
3347   } else if (scall == MAT_REUSE_MATRIX){
3348     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3349     ci = mat->i; cj = mat->j; ca = mat->a;
3350     for (i=0; i<am; i++) {
3351       /* off-diagonal portion of A */
3352       ncols_o = bi[i+1] - bi[i];
3353       for (jo=0; jo<ncols_o; jo++) {
3354         col = cmap[*bj];
3355         if (col >= cstart) break;
3356         *ca++ = *ba++; bj++;
3357       }
3358       /* diagonal portion of A */
3359       ncols_d = ai[i+1] - ai[i];
3360       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3361       /* off-diagonal portion of A */
3362       for (j=jo; j<ncols_o; j++) {
3363         *ca++ = *ba++; bj++;
3364       }
3365     }
3366   } else {
3367     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3368   }
3369 
3370   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3371   PetscFunctionReturn(0);
3372 }
3373 
3374 static PetscEvent logkey_getlocalmatcondensed = 0;
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatGetLocalMatCondensed"
3377 /*@C
3378      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3379 
3380     Not Collective
3381 
3382    Input Parameters:
3383 +    A - the matrix
3384 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3385 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3386 
3387    Output Parameter:
3388 .    A_loc - the local sequential matrix generated
3389 
3390     Level: developer
3391 
3392 @*/
3393 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3394 {
3395   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3396   PetscErrorCode    ierr;
3397   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3398   IS                isrowa,iscola;
3399   Mat               *aloc;
3400 
3401   PetscFunctionBegin;
3402   if (!logkey_getlocalmatcondensed) {
3403     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3404   }
3405   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3406   if (!row){
3407     start = a->rstart; end = a->rend;
3408     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3409   } else {
3410     isrowa = *row;
3411   }
3412   if (!col){
3413     start = a->cstart;
3414     cmap  = a->garray;
3415     nzA   = a->A->n;
3416     nzB   = a->B->n;
3417     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3418     ncols = 0;
3419     for (i=0; i<nzB; i++) {
3420       if (cmap[i] < start) idx[ncols++] = cmap[i];
3421       else break;
3422     }
3423     imark = i;
3424     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3425     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3426     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3427     ierr = PetscFree(idx);CHKERRQ(ierr);
3428   } else {
3429     iscola = *col;
3430   }
3431   if (scall != MAT_INITIAL_MATRIX){
3432     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3433     aloc[0] = *A_loc;
3434   }
3435   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3436   *A_loc = aloc[0];
3437   ierr = PetscFree(aloc);CHKERRQ(ierr);
3438   if (!row){
3439     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3440   }
3441   if (!col){
3442     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3443   }
3444   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3445   PetscFunctionReturn(0);
3446 }
3447 
3448 static PetscEvent logkey_GetBrowsOfAcols = 0;
3449 #undef __FUNCT__
3450 #define __FUNCT__ "MatGetBrowsOfAcols"
3451 /*@C
3452     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3453 
3454     Collective on Mat
3455 
3456    Input Parameters:
3457 +    A,B - the matrices in mpiaij format
3458 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3459 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3460 
3461    Output Parameter:
3462 +    rowb, colb - index sets of rows and columns of B to extract
3463 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3464 -    B_seq - the sequential matrix generated
3465 
3466     Level: developer
3467 
3468 @*/
3469 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3470 {
3471   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3472   PetscErrorCode    ierr;
3473   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3474   IS                isrowb,iscolb;
3475   Mat               *bseq;
3476 
3477   PetscFunctionBegin;
3478   if (a->cstart != b->rstart || a->cend != b->rend){
3479     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3480   }
3481   if (!logkey_GetBrowsOfAcols) {
3482     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3483   }
3484   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3485 
3486   if (scall == MAT_INITIAL_MATRIX){
3487     start = a->cstart;
3488     cmap  = a->garray;
3489     nzA   = a->A->n;
3490     nzB   = a->B->n;
3491     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3492     ncols = 0;
3493     for (i=0; i<nzB; i++) {  /* row < local row index */
3494       if (cmap[i] < start) idx[ncols++] = cmap[i];
3495       else break;
3496     }
3497     imark = i;
3498     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3499     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3500     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3501     ierr = PetscFree(idx);CHKERRQ(ierr);
3502     *brstart = imark;
3503     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3504   } else {
3505     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3506     isrowb = *rowb; iscolb = *colb;
3507     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3508     bseq[0] = *B_seq;
3509   }
3510   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3511   *B_seq = bseq[0];
3512   ierr = PetscFree(bseq);CHKERRQ(ierr);
3513   if (!rowb){
3514     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3515   } else {
3516     *rowb = isrowb;
3517   }
3518   if (!colb){
3519     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3520   } else {
3521     *colb = iscolb;
3522   }
3523   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3524   PetscFunctionReturn(0);
3525 }
3526 
3527 static PetscEvent logkey_GetBrowsOfAocols = 0;
3528 #undef __FUNCT__
3529 #define __FUNCT__ "MatGetBrowsOfAoCols"
3530 /*@C
3531     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3532     of the OFF-DIAGONAL portion of local A
3533 
3534     Collective on Mat
3535 
3536    Input Parameters:
3537 +    A,B - the matrices in mpiaij format
3538 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3539 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3540 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3541 
3542    Output Parameter:
3543 +    B_oth - the sequential matrix generated
3544 
3545     Level: developer
3546 
3547 @*/
3548 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3549 {
3550   VecScatter_MPI_General *gen_to,*gen_from;
3551   PetscErrorCode         ierr;
3552   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3553   Mat_SeqAIJ             *b_oth;
3554   VecScatter             ctx=a->Mvctx;
3555   MPI_Comm               comm=ctx->comm;
3556   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3557   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3558   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3559   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3560   MPI_Request            *rwaits,*swaits;
3561   MPI_Status             *sstatus,rstatus;
3562   PetscInt               *cols;
3563   PetscScalar            *vals;
3564   PetscMPIInt            j;
3565 
3566   PetscFunctionBegin;
3567   if (a->cstart != b->rstart || a->cend != b->rend){
3568     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3569   }
3570   if (!logkey_GetBrowsOfAocols) {
3571     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3572   }
3573   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3574   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3575 
3576   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3577   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3578   rvalues  = gen_from->values; /* holds the length of sending row */
3579   svalues  = gen_to->values;   /* holds the length of receiving row */
3580   nrecvs   = gen_from->n;
3581   nsends   = gen_to->n;
3582   rwaits   = gen_from->requests;
3583   swaits   = gen_to->requests;
3584   rrow     = gen_from->indices; /* local row index to be received */
3585   srow     = gen_to->indices;   /* local row index to be sent */
3586   rstarts  = gen_from->starts;
3587   sstarts  = gen_to->starts;
3588   rprocs   = gen_from->procs;
3589   sprocs   = gen_to->procs;
3590   sstatus  = gen_to->sstatus;
3591 
3592   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3593   if (scall == MAT_INITIAL_MATRIX){
3594     /* i-array */
3595     /*---------*/
3596     /*  post receives */
3597     for (i=0; i<nrecvs; i++){
3598       rowlen = (PetscInt*)rvalues + rstarts[i];
3599       nrows = rstarts[i+1]-rstarts[i];
3600       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3601     }
3602 
3603     /* pack the outgoing message */
3604     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3605     rstartsj = sstartsj + nsends +1;
3606     sstartsj[0] = 0;  rstartsj[0] = 0;
3607     len = 0; /* total length of j or a array to be sent */
3608     k = 0;
3609     for (i=0; i<nsends; i++){
3610       rowlen = (PetscInt*)svalues + sstarts[i];
3611       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3612       for (j=0; j<nrows; j++) {
3613         row = srow[k] + b->rowners[rank]; /* global row idx */
3614         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3615         len += rowlen[j];
3616         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3617         k++;
3618       }
3619       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3620        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3621     }
3622     /* recvs and sends of i-array are completed */
3623     i = nrecvs;
3624     while (i--) {
3625       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3626     }
3627     if (nsends) {
3628       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3629     }
3630     /* allocate buffers for sending j and a arrays */
3631     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3632     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3633 
3634     /* create i-array of B_oth */
3635     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3636     b_othi[0] = 0;
3637     len = 0; /* total length of j or a array to be received */
3638     k = 0;
3639     for (i=0; i<nrecvs; i++){
3640       rowlen = (PetscInt*)rvalues + rstarts[i];
3641       nrows = rstarts[i+1]-rstarts[i];
3642       for (j=0; j<nrows; j++) {
3643         b_othi[k+1] = b_othi[k] + rowlen[j];
3644         len += rowlen[j]; k++;
3645       }
3646       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3647     }
3648 
3649     /* allocate space for j and a arrrays of B_oth */
3650     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3651     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3652 
3653     /* j-array */
3654     /*---------*/
3655     /*  post receives of j-array */
3656     for (i=0; i<nrecvs; i++){
3657       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3658       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3659     }
3660     k = 0;
3661     for (i=0; i<nsends; i++){
3662       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3663       bufJ = bufj+sstartsj[i];
3664       for (j=0; j<nrows; j++) {
3665         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3666         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3667         for (l=0; l<ncols; l++){
3668           *bufJ++ = cols[l];
3669         }
3670         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3671       }
3672       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3673     }
3674 
3675     /* recvs and sends of j-array are completed */
3676     i = nrecvs;
3677     while (i--) {
3678       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3679     }
3680     if (nsends) {
3681       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3682     }
3683   } else if (scall == MAT_REUSE_MATRIX){
3684     sstartsj = *startsj;
3685     rstartsj = sstartsj + nsends +1;
3686     bufa     = *bufa_ptr;
3687     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3688     b_otha   = b_oth->a;
3689   } else {
3690     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3691   }
3692 
3693   /* a-array */
3694   /*---------*/
3695   /*  post receives of a-array */
3696   for (i=0; i<nrecvs; i++){
3697     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3698     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3699   }
3700   k = 0;
3701   for (i=0; i<nsends; i++){
3702     nrows = sstarts[i+1]-sstarts[i];
3703     bufA = bufa+sstartsj[i];
3704     for (j=0; j<nrows; j++) {
3705       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3706       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3707       for (l=0; l<ncols; l++){
3708         *bufA++ = vals[l];
3709       }
3710       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3711 
3712     }
3713     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3714   }
3715   /* recvs and sends of a-array are completed */
3716   i = nrecvs;
3717   while (i--) {
3718     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3719   }
3720    if (nsends) {
3721     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3722   }
3723 
3724   if (scall == MAT_INITIAL_MATRIX){
3725     /* put together the new matrix */
3726     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3727 
3728     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3729     /* Since these are PETSc arrays, change flags to free them as necessary. */
3730     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3731     b_oth->freedata = PETSC_TRUE;
3732     b_oth->nonew    = 0;
3733 
3734     ierr = PetscFree(bufj);CHKERRQ(ierr);
3735     if (!startsj || !bufa_ptr){
3736       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3737       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3738     } else {
3739       *startsj  = sstartsj;
3740       *bufa_ptr = bufa;
3741     }
3742   }
3743   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3744 
3745   PetscFunctionReturn(0);
3746 }
3747 
3748 /*MC
3749    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3750 
3751    Options Database Keys:
3752 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3753 
3754   Level: beginner
3755 
3756 .seealso: MatCreateMPIAIJ
3757 M*/
3758 
3759 EXTERN_C_BEGIN
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatCreate_MPIAIJ"
3762 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3763 {
3764   Mat_MPIAIJ     *b;
3765   PetscErrorCode ierr;
3766   PetscInt       i;
3767   PetscMPIInt    size;
3768 
3769   PetscFunctionBegin;
3770   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3771 
3772   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3773   B->data         = (void*)b;
3774   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3775   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3776   B->factor       = 0;
3777   B->assembled    = PETSC_FALSE;
3778   B->mapping      = 0;
3779 
3780   B->insertmode      = NOT_SET_VALUES;
3781   b->size            = size;
3782   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3783 
3784   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3785   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3786 
3787   /* the information in the maps duplicates the information computed below, eventually
3788      we should remove the duplicate information that is not contained in the maps */
3789   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3790   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3791 
3792   /* build local table of row and column ownerships */
3793   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3794   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3795   b->cowners = b->rowners + b->size + 2;
3796   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3797   b->rowners[0] = 0;
3798   for (i=2; i<=b->size; i++) {
3799     b->rowners[i] += b->rowners[i-1];
3800   }
3801   b->rstart = b->rowners[b->rank];
3802   b->rend   = b->rowners[b->rank+1];
3803   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3804   b->cowners[0] = 0;
3805   for (i=2; i<=b->size; i++) {
3806     b->cowners[i] += b->cowners[i-1];
3807   }
3808   b->cstart = b->cowners[b->rank];
3809   b->cend   = b->cowners[b->rank+1];
3810 
3811   /* build cache for off array entries formed */
3812   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3813   b->donotstash  = PETSC_FALSE;
3814   b->colmap      = 0;
3815   b->garray      = 0;
3816   b->roworiented = PETSC_TRUE;
3817 
3818   /* stuff used for matrix vector multiply */
3819   b->lvec      = PETSC_NULL;
3820   b->Mvctx     = PETSC_NULL;
3821 
3822   /* stuff for MatGetRow() */
3823   b->rowindices   = 0;
3824   b->rowvalues    = 0;
3825   b->getrowactive = PETSC_FALSE;
3826 
3827   /* Explicitly create 2 MATSEQAIJ matrices. */
3828   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3829   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3830   PetscLogObjectParent(B,b->A);
3831   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3832   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3833   PetscLogObjectParent(B,b->B);
3834 
3835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3836                                      "MatStoreValues_MPIAIJ",
3837                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3839                                      "MatRetrieveValues_MPIAIJ",
3840                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3842 				     "MatGetDiagonalBlock_MPIAIJ",
3843                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3845 				     "MatIsTranspose_MPIAIJ",
3846 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3848 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3849 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3851 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3852 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3854 				     "MatDiagonalScaleLocal_MPIAIJ",
3855 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3856   PetscFunctionReturn(0);
3857 }
3858 EXTERN_C_END
3859