xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dcf5cc722893849f9d6a49faec7bb9e7955fb956)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
345 {
346   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
347   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
348   PetscErrorCode ierr;
349   PetscMPIInt    n;
350   PetscInt       i,j,rstart,ncols,flg;
351   PetscInt       *row,*col,other_disassembled;
352   PetscScalar    *val;
353   InsertMode     addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387       /* reaccess the b because aij->B was changed */
388       b    = (Mat_SeqAIJ *)aij->B->data;
389     }
390   }
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
394   }
395 
396   b->inode.use             = PETSC_FALSE;
397   b->compressedrow.use     = PETSC_TRUE;
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405 
406   /* used by MatAXPY() */
407   a->xtoy = 0; b->xtoy = 0;
408   a->XtoY = 0; b->XtoY = 0;
409 
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
416 {
417   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
422   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatZeroRows_MPIAIJ"
428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
429 {
430   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
431   PetscErrorCode ierr;
432   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
433   PetscInt       i,N,*rows,*owners = l->rowners;
434   PetscInt       *nprocs,j,idx,nsends,row;
435   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
436   PetscInt       *rvalues,count,base,slen,*source;
437   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
438   MPI_Comm       comm = A->comm;
439   MPI_Request    *send_waits,*recv_waits;
440   MPI_Status     recv_status,*send_status;
441   IS             istmp;
442   PetscTruth     found;
443 
444   PetscFunctionBegin;
445   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
446   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
447 
448   /*  first count number of contributors to each processor */
449   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
450   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
451   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
452   for (i=0; i<N; i++) {
453     idx = rows[i];
454     found = PETSC_FALSE;
455     for (j=0; j<size; j++) {
456       if (idx >= owners[j] && idx < owners[j+1]) {
457         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
458       }
459     }
460     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
461   }
462   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
463 
464   /* inform other processors of number of messages and max length*/
465   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
466 
467   /* post receives:   */
468   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
469   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
470   for (i=0; i<nrecvs; i++) {
471     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
472   }
473 
474   /* do sends:
475       1) starts[i] gives the starting index in svalues for stuff going to
476          the ith processor
477   */
478   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
479   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
480   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
481   starts[0] = 0;
482   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
483   for (i=0; i<N; i++) {
484     svalues[starts[owner[i]]++] = rows[i];
485   }
486   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
487 
488   starts[0] = 0;
489   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
490   count = 0;
491   for (i=0; i<size; i++) {
492     if (nprocs[2*i+1]) {
493       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
494     }
495   }
496   ierr = PetscFree(starts);CHKERRQ(ierr);
497 
498   base = owners[rank];
499 
500   /*  wait on receives */
501   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
502   source = lens + nrecvs;
503   count  = nrecvs; slen = 0;
504   while (count) {
505     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
506     /* unpack receives into our local space */
507     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
508     source[imdex]  = recv_status.MPI_SOURCE;
509     lens[imdex]    = n;
510     slen          += n;
511     count--;
512   }
513   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
514 
515   /* move the data into the send scatter */
516   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
517   count = 0;
518   for (i=0; i<nrecvs; i++) {
519     values = rvalues + i*nmax;
520     for (j=0; j<lens[i]; j++) {
521       lrows[count++] = values[j] - base;
522     }
523   }
524   ierr = PetscFree(rvalues);CHKERRQ(ierr);
525   ierr = PetscFree(lens);CHKERRQ(ierr);
526   ierr = PetscFree(owner);CHKERRQ(ierr);
527   ierr = PetscFree(nprocs);CHKERRQ(ierr);
528 
529   /* actually zap the local rows */
530   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
531   PetscLogObjectParent(A,istmp);
532 
533   /*
534         Zero the required rows. If the "diagonal block" of the matrix
535      is square and the user wishes to set the diagonal we use seperate
536      code so that MatSetValues() is not called for each diagonal allocating
537      new memory, thus calling lots of mallocs and slowing things down.
538 
539        Contributed by: Mathew Knepley
540   */
541   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
542   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
543   if (diag && (l->A->M == l->A->N)) {
544     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
545   } else if (diag) {
546     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
547     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
548       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
549 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
550     }
551     for (i = 0; i < slen; i++) {
552       row  = lrows[i] + rstart;
553       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
554     }
555     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
556     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557   } else {
558     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
559   }
560   ierr = ISDestroy(istmp);CHKERRQ(ierr);
561   ierr = PetscFree(lrows);CHKERRQ(ierr);
562 
563   /* wait on sends */
564   if (nsends) {
565     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
566     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
567     ierr = PetscFree(send_status);CHKERRQ(ierr);
568   }
569   ierr = PetscFree(send_waits);CHKERRQ(ierr);
570   ierr = PetscFree(svalues);CHKERRQ(ierr);
571 
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMult_MPIAIJ"
577 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscInt       nt;
582 
583   PetscFunctionBegin;
584   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
585   if (nt != A->n) {
586     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
587   }
588   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
590   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
592   PetscFunctionReturn(0);
593 }
594 
595 #undef __FUNCT__
596 #define __FUNCT__ "MatMultAdd_MPIAIJ"
597 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
598 {
599   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
600   PetscErrorCode ierr;
601 
602   PetscFunctionBegin;
603   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
605   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
612 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
613 {
614   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
615   PetscErrorCode ierr;
616 
617   PetscFunctionBegin;
618   /* do nondiagonal part */
619   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
620   /* send it on its way */
621   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   /* do local part */
623   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
624   /* receive remote parts: note this assumes the values are not actually */
625   /* inserted in yy until the next line, which is true for my implementation*/
626   /* but is not perhaps always true. */
627   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
628   PetscFunctionReturn(0);
629 }
630 
631 EXTERN_C_BEGIN
632 #undef __FUNCT__
633 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
634 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
635 {
636   MPI_Comm       comm;
637   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
638   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
639   IS             Me,Notme;
640   PetscErrorCode ierr;
641   PetscInt       M,N,first,last,*notme,i;
642   PetscMPIInt    size;
643 
644   PetscFunctionBegin;
645 
646   /* Easy test: symmetric diagonal block */
647   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
648   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
649   if (!*f) PetscFunctionReturn(0);
650   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
651   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
652   if (size == 1) PetscFunctionReturn(0);
653 
654   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
655   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
656   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
657   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
658   for (i=0; i<first; i++) notme[i] = i;
659   for (i=last; i<M; i++) notme[i-last+first] = i;
660   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
661   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
662   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
663   Aoff = Aoffs[0];
664   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
665   Boff = Boffs[0];
666   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
667   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
668   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
669   ierr = ISDestroy(Me);CHKERRQ(ierr);
670   ierr = ISDestroy(Notme);CHKERRQ(ierr);
671 
672   PetscFunctionReturn(0);
673 }
674 EXTERN_C_END
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
678 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
679 {
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681   PetscErrorCode ierr;
682 
683   PetscFunctionBegin;
684   /* do nondiagonal part */
685   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
686   /* send it on its way */
687   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
688   /* do local part */
689   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
690   /* receive remote parts: note this assumes the values are not actually */
691   /* inserted in yy until the next line, which is true for my implementation*/
692   /* but is not perhaps always true. */
693   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 /*
698   This only works correctly for square matrices where the subblock A->A is the
699    diagonal block
700 */
701 #undef __FUNCT__
702 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
703 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
704 {
705   PetscErrorCode ierr;
706   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
707 
708   PetscFunctionBegin;
709   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
710   if (a->rstart != a->cstart || a->rend != a->cend) {
711     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
712   }
713   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 #undef __FUNCT__
718 #define __FUNCT__ "MatScale_MPIAIJ"
719 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
720 {
721   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
722   PetscErrorCode ierr;
723 
724   PetscFunctionBegin;
725   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
726   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatDestroy_MPIAIJ"
732 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
733 {
734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738 #if defined(PETSC_USE_LOG)
739   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
740 #endif
741   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
742   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
743   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
744   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
745 #if defined (PETSC_USE_CTABLE)
746   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
747 #else
748   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
749 #endif
750   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
751   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
752   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
753   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
754   ierr = PetscFree(aij);CHKERRQ(ierr);
755 
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatView_MPIAIJ_Binary"
768 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
769 {
770   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
771   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
772   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
773   PetscErrorCode    ierr;
774   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
775   int               fd;
776   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
777   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
778   PetscScalar       *column_values;
779 
780   PetscFunctionBegin;
781   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
782   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
783   nz   = A->nz + B->nz;
784   if (!rank) {
785     header[0] = MAT_FILE_COOKIE;
786     header[1] = mat->M;
787     header[2] = mat->N;
788     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
789     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
790     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     /* get largest number of rows any processor has */
792     rlen = mat->m;
793     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       rlen = PetscMax(rlen,range[i+1] - range[i]);
796     }
797   } else {
798     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     rlen = mat->m;
800   }
801 
802   /* load up the local row counts */
803   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
804   for (i=0; i<mat->m; i++) {
805     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
806   }
807 
808   /* store the row lengths to the file */
809   if (!rank) {
810     MPI_Status status;
811     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
812     for (i=1; i<size; i++) {
813       rlen = range[i+1] - range[i];
814       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
815       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
816     }
817   } else {
818     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
819   }
820   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
821 
822   /* load up the local column indices */
823   nzmax = nz; /* )th processor needs space a largest processor needs */
824   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
825   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
826   cnt  = 0;
827   for (i=0; i<mat->m; i++) {
828     for (j=B->i[i]; j<B->i[i+1]; j++) {
829       if ( (col = garray[B->j[j]]) > cstart) break;
830       column_indices[cnt++] = col;
831     }
832     for (k=A->i[i]; k<A->i[i+1]; k++) {
833       column_indices[cnt++] = A->j[k] + cstart;
834     }
835     for (; j<B->i[i+1]; j++) {
836       column_indices[cnt++] = garray[B->j[j]];
837     }
838   }
839   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
840 
841   /* store the column indices to the file */
842   if (!rank) {
843     MPI_Status status;
844     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
845     for (i=1; i<size; i++) {
846       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
848       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
850     }
851   } else {
852     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
853     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
854   }
855   ierr = PetscFree(column_indices);CHKERRQ(ierr);
856 
857   /* load up the local column values */
858   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
859   cnt  = 0;
860   for (i=0; i<mat->m; i++) {
861     for (j=B->i[i]; j<B->i[i+1]; j++) {
862       if ( garray[B->j[j]] > cstart) break;
863       column_values[cnt++] = B->a[j];
864     }
865     for (k=A->i[i]; k<A->i[i+1]; k++) {
866       column_values[cnt++] = A->a[k];
867     }
868     for (; j<B->i[i+1]; j++) {
869       column_values[cnt++] = B->a[j];
870     }
871   }
872   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
873 
874   /* store the column values to the file */
875   if (!rank) {
876     MPI_Status status;
877     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
878     for (i=1; i<size; i++) {
879       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
881       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
883     }
884   } else {
885     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
886     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
887   }
888   ierr = PetscFree(column_values);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
894 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
895 {
896   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
897   PetscErrorCode    ierr;
898   PetscMPIInt       rank = aij->rank,size = aij->size;
899   PetscTruth        isdraw,iascii,flg,isbinary;
900   PetscViewer       sviewer;
901   PetscViewerFormat format;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
907   if (iascii) {
908     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
909     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
910       MatInfo info;
911       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
912       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
913       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
914       if (flg) {
915         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
916 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
917       } else {
918         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
919 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
920       }
921       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
923       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
926       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
927       PetscFunctionReturn(0);
928     } else if (format == PETSC_VIEWER_ASCII_INFO) {
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
931       PetscFunctionReturn(0);
932     }
933   } else if (isbinary) {
934     if (size == 1) {
935       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
936       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
937     } else {
938       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
939     }
940     PetscFunctionReturn(0);
941   } else if (isdraw) {
942     PetscDraw  draw;
943     PetscTruth isnull;
944     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
945     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
946   }
947 
948   if (size == 1) {
949     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
950     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
951   } else {
952     /* assemble the entire matrix onto first processor. */
953     Mat         A;
954     Mat_SeqAIJ  *Aloc;
955     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
956     PetscScalar *a;
957 
958     if (!rank) {
959       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
960     } else {
961       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
962     }
963     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
964     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
965     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
966     PetscLogObjectParent(mat,A);
967 
968     /* copy over the A part */
969     Aloc = (Mat_SeqAIJ*)aij->A->data;
970     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row = aij->rstart;
972     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
973     for (i=0; i<m; i++) {
974       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
975       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
976     }
977     aj = Aloc->j;
978     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
979 
980     /* copy over the B part */
981     Aloc = (Mat_SeqAIJ*)aij->B->data;
982     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
983     row  = aij->rstart;
984     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
985     ct   = cols;
986     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
987     for (i=0; i<m; i++) {
988       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
989       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
990     }
991     ierr = PetscFree(ct);CHKERRQ(ierr);
992     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
993     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
994     /*
995        Everyone has to call to draw the matrix since the graphics waits are
996        synchronized across all processors that share the PetscDraw object
997     */
998     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
999     if (!rank) {
1000       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1001       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1002     }
1003     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1004     ierr = MatDestroy(A);CHKERRQ(ierr);
1005   }
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatView_MPIAIJ"
1011 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1012 {
1013   PetscErrorCode ierr;
1014   PetscTruth     iascii,isdraw,issocket,isbinary;
1015 
1016   PetscFunctionBegin;
1017   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1018   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1019   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1020   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1021   if (iascii || isdraw || isbinary || issocket) {
1022     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1023   } else {
1024     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatRelax_MPIAIJ"
1033 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1034 {
1035   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1036   PetscErrorCode ierr;
1037   Vec            bb1;
1038   PetscScalar    mone=-1.0;
1039 
1040   PetscFunctionBegin;
1041   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1042 
1043   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1044 
1045   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050 
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1081     if (flag & SOR_ZERO_INITIAL_GUESS) {
1082       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1083       its--;
1084     }
1085     while (its--) {
1086       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1087       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1088 
1089       /* update rhs: bb1 = bb - B*x */
1090       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1091       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1092 
1093       /* local sweep */
1094       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1095       CHKERRQ(ierr);
1096     }
1097   } else {
1098     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1099   }
1100 
1101   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1107 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1108 {
1109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1110   Mat            A = mat->A,B = mat->B;
1111   PetscErrorCode ierr;
1112   PetscReal      isend[5],irecv[5];
1113 
1114   PetscFunctionBegin;
1115   info->block_size     = 1.0;
1116   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1117   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1118   isend[3] = info->memory;  isend[4] = info->mallocs;
1119   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1120   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1121   isend[3] += info->memory;  isend[4] += info->mallocs;
1122   if (flag == MAT_LOCAL) {
1123     info->nz_used      = isend[0];
1124     info->nz_allocated = isend[1];
1125     info->nz_unneeded  = isend[2];
1126     info->memory       = isend[3];
1127     info->mallocs      = isend[4];
1128   } else if (flag == MAT_GLOBAL_MAX) {
1129     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1130     info->nz_used      = irecv[0];
1131     info->nz_allocated = irecv[1];
1132     info->nz_unneeded  = irecv[2];
1133     info->memory       = irecv[3];
1134     info->mallocs      = irecv[4];
1135   } else if (flag == MAT_GLOBAL_SUM) {
1136     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1137     info->nz_used      = irecv[0];
1138     info->nz_allocated = irecv[1];
1139     info->nz_unneeded  = irecv[2];
1140     info->memory       = irecv[3];
1141     info->mallocs      = irecv[4];
1142   }
1143   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1144   info->fill_ratio_needed = 0;
1145   info->factor_mallocs    = 0;
1146   info->rows_global       = (double)matin->M;
1147   info->columns_global    = (double)matin->N;
1148   info->rows_local        = (double)matin->m;
1149   info->columns_local     = (double)matin->N;
1150 
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatSetOption_MPIAIJ"
1156 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1157 {
1158   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   switch (op) {
1163   case MAT_NO_NEW_NONZERO_LOCATIONS:
1164   case MAT_YES_NEW_NONZERO_LOCATIONS:
1165   case MAT_COLUMNS_UNSORTED:
1166   case MAT_COLUMNS_SORTED:
1167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1168   case MAT_KEEP_ZEROED_ROWS:
1169   case MAT_NEW_NONZERO_LOCATION_ERR:
1170   case MAT_USE_INODES:
1171   case MAT_DO_NOT_USE_INODES:
1172   case MAT_IGNORE_ZERO_ENTRIES:
1173     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1174     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1175     break;
1176   case MAT_ROW_ORIENTED:
1177     a->roworiented = PETSC_TRUE;
1178     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1179     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1180     break;
1181   case MAT_ROWS_SORTED:
1182   case MAT_ROWS_UNSORTED:
1183   case MAT_YES_NEW_DIAGONALS:
1184     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1185     break;
1186   case MAT_COLUMN_ORIENTED:
1187     a->roworiented = PETSC_FALSE;
1188     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1189     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1190     break;
1191   case MAT_IGNORE_OFF_PROC_ENTRIES:
1192     a->donotstash = PETSC_TRUE;
1193     break;
1194   case MAT_NO_NEW_DIAGONALS:
1195     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1196   case MAT_SYMMETRIC:
1197   case MAT_STRUCTURALLY_SYMMETRIC:
1198   case MAT_HERMITIAN:
1199   case MAT_SYMMETRY_ETERNAL:
1200     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1201     break;
1202   case MAT_NOT_SYMMETRIC:
1203   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1204   case MAT_NOT_HERMITIAN:
1205   case MAT_NOT_SYMMETRY_ETERNAL:
1206     break;
1207   default:
1208     SETERRQ(PETSC_ERR_SUP,"unknown option");
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetRow_MPIAIJ"
1215 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1216 {
1217   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1218   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1219   PetscErrorCode ierr;
1220   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1221   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1222   PetscInt       *cmap,*idx_p;
1223 
1224   PetscFunctionBegin;
1225   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1226   mat->getrowactive = PETSC_TRUE;
1227 
1228   if (!mat->rowvalues && (idx || v)) {
1229     /*
1230         allocate enough space to hold information from the longest row.
1231     */
1232     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1233     PetscInt     max = 1,tmp;
1234     for (i=0; i<matin->m; i++) {
1235       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1236       if (max < tmp) { max = tmp; }
1237     }
1238     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1239     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1240   }
1241 
1242   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1243   lrow = row - rstart;
1244 
1245   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1246   if (!v)   {pvA = 0; pvB = 0;}
1247   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1248   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1249   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1250   nztot = nzA + nzB;
1251 
1252   cmap  = mat->garray;
1253   if (v  || idx) {
1254     if (nztot) {
1255       /* Sort by increasing column numbers, assuming A and B already sorted */
1256       PetscInt imark = -1;
1257       if (v) {
1258         *v = v_p = mat->rowvalues;
1259         for (i=0; i<nzB; i++) {
1260           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1261           else break;
1262         }
1263         imark = i;
1264         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1265         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1266       }
1267       if (idx) {
1268         *idx = idx_p = mat->rowindices;
1269         if (imark > -1) {
1270           for (i=0; i<imark; i++) {
1271             idx_p[i] = cmap[cworkB[i]];
1272           }
1273         } else {
1274           for (i=0; i<nzB; i++) {
1275             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1276             else break;
1277           }
1278           imark = i;
1279         }
1280         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1281         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1282       }
1283     } else {
1284       if (idx) *idx = 0;
1285       if (v)   *v   = 0;
1286     }
1287   }
1288   *nz = nztot;
1289   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1290   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1296 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1297 {
1298   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1299 
1300   PetscFunctionBegin;
1301   if (aij->getrowactive == PETSC_FALSE) {
1302     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1303   }
1304   aij->getrowactive = PETSC_FALSE;
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatNorm_MPIAIJ"
1310 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1311 {
1312   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1313   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscInt       i,j,cstart = aij->cstart;
1316   PetscReal      sum = 0.0;
1317   PetscScalar    *v;
1318 
1319   PetscFunctionBegin;
1320   if (aij->size == 1) {
1321     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1322   } else {
1323     if (type == NORM_FROBENIUS) {
1324       v = amat->a;
1325       for (i=0; i<amat->nz; i++) {
1326 #if defined(PETSC_USE_COMPLEX)
1327         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1328 #else
1329         sum += (*v)*(*v); v++;
1330 #endif
1331       }
1332       v = bmat->a;
1333       for (i=0; i<bmat->nz; i++) {
1334 #if defined(PETSC_USE_COMPLEX)
1335         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1336 #else
1337         sum += (*v)*(*v); v++;
1338 #endif
1339       }
1340       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       *norm = sqrt(*norm);
1342     } else if (type == NORM_1) { /* max column norm */
1343       PetscReal *tmp,*tmp2;
1344       PetscInt    *jj,*garray = aij->garray;
1345       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1346       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1347       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1348       *norm = 0.0;
1349       v = amat->a; jj = amat->j;
1350       for (j=0; j<amat->nz; j++) {
1351         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1352       }
1353       v = bmat->a; jj = bmat->j;
1354       for (j=0; j<bmat->nz; j++) {
1355         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1356       }
1357       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1358       for (j=0; j<mat->N; j++) {
1359         if (tmp2[j] > *norm) *norm = tmp2[j];
1360       }
1361       ierr = PetscFree(tmp);CHKERRQ(ierr);
1362       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1363     } else if (type == NORM_INFINITY) { /* max row norm */
1364       PetscReal ntemp = 0.0;
1365       for (j=0; j<aij->A->m; j++) {
1366         v = amat->a + amat->i[j];
1367         sum = 0.0;
1368         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1369           sum += PetscAbsScalar(*v); v++;
1370         }
1371         v = bmat->a + bmat->i[j];
1372         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1373           sum += PetscAbsScalar(*v); v++;
1374         }
1375         if (sum > ntemp) ntemp = sum;
1376       }
1377       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1378     } else {
1379       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1380     }
1381   }
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatTranspose_MPIAIJ"
1387 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1388 {
1389   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1390   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1391   PetscErrorCode ierr;
1392   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1393   Mat            B;
1394   PetscScalar    *array;
1395 
1396   PetscFunctionBegin;
1397   if (!matout && M != N) {
1398     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1399   }
1400 
1401   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1402   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1403   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1404 
1405   /* copy over the A part */
1406   Aloc = (Mat_SeqAIJ*)a->A->data;
1407   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1408   row = a->rstart;
1409   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1410   for (i=0; i<m; i++) {
1411     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1412     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1413   }
1414   aj = Aloc->j;
1415   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1416 
1417   /* copy over the B part */
1418   Aloc = (Mat_SeqAIJ*)a->B->data;
1419   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1420   row  = a->rstart;
1421   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1422   ct   = cols;
1423   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1424   for (i=0; i<m; i++) {
1425     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1426     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1427   }
1428   ierr = PetscFree(ct);CHKERRQ(ierr);
1429   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1430   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   if (matout) {
1432     *matout = B;
1433   } else {
1434     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1441 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1442 {
1443   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1444   Mat            a = aij->A,b = aij->B;
1445   PetscErrorCode ierr;
1446   PetscInt       s1,s2,s3;
1447 
1448   PetscFunctionBegin;
1449   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1450   if (rr) {
1451     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1452     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1453     /* Overlap communication with computation. */
1454     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1455   }
1456   if (ll) {
1457     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1458     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1459     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1460   }
1461   /* scale  the diagonal block */
1462   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1463 
1464   if (rr) {
1465     /* Do a scatter end and then right scale the off-diagonal block */
1466     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1467     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1468   }
1469 
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1476 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1477 {
1478   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   if (!a->rank) {
1483     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1490 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1491 {
1492   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1497   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1502 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1503 {
1504   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1505   PetscErrorCode ierr;
1506 
1507   PetscFunctionBegin;
1508   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatEqual_MPIAIJ"
1514 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1515 {
1516   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1517   Mat            a,b,c,d;
1518   PetscTruth     flg;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   a = matA->A; b = matA->B;
1523   c = matB->A; d = matB->B;
1524 
1525   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1526   if (flg == PETSC_TRUE) {
1527     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1528   }
1529   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatCopy_MPIAIJ"
1535 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1536 {
1537   PetscErrorCode ierr;
1538   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1539   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1540 
1541   PetscFunctionBegin;
1542   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1543   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1544     /* because of the column compression in the off-processor part of the matrix a->B,
1545        the number of columns in a->B and b->B may be different, hence we cannot call
1546        the MatCopy() directly on the two parts. If need be, we can provide a more
1547        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1548        then copying the submatrices */
1549     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1550   } else {
1551     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1552     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1553   }
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1559 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1560 {
1561   PetscErrorCode ierr;
1562 
1563   PetscFunctionBegin;
1564   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #include "petscblaslapack.h"
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatAXPY_MPIAIJ"
1571 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1572 {
1573   PetscErrorCode ierr;
1574   PetscInt       i;
1575   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1576   PetscBLASInt   bnz,one=1;
1577   Mat_SeqAIJ     *x,*y;
1578 
1579   PetscFunctionBegin;
1580   if (str == SAME_NONZERO_PATTERN) {
1581     x = (Mat_SeqAIJ *)xx->A->data;
1582     y = (Mat_SeqAIJ *)yy->A->data;
1583     bnz = (PetscBLASInt)x->nz;
1584     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1585     x = (Mat_SeqAIJ *)xx->B->data;
1586     y = (Mat_SeqAIJ *)yy->B->data;
1587     bnz = (PetscBLASInt)x->nz;
1588     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1589   } else if (str == SUBSET_NONZERO_PATTERN) {
1590     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1591 
1592     x = (Mat_SeqAIJ *)xx->B->data;
1593     y = (Mat_SeqAIJ *)yy->B->data;
1594     if (y->xtoy && y->XtoY != xx->B) {
1595       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1596       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1597     }
1598     if (!y->xtoy) { /* get xtoy */
1599       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1600       y->XtoY = xx->B;
1601     }
1602     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1603   } else {
1604     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1605   }
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 /* -------------------------------------------------------------------*/
1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1611        MatGetRow_MPIAIJ,
1612        MatRestoreRow_MPIAIJ,
1613        MatMult_MPIAIJ,
1614 /* 4*/ MatMultAdd_MPIAIJ,
1615        MatMultTranspose_MPIAIJ,
1616        MatMultTransposeAdd_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620 /*10*/ 0,
1621        0,
1622        0,
1623        MatRelax_MPIAIJ,
1624        MatTranspose_MPIAIJ,
1625 /*15*/ MatGetInfo_MPIAIJ,
1626        MatEqual_MPIAIJ,
1627        MatGetDiagonal_MPIAIJ,
1628        MatDiagonalScale_MPIAIJ,
1629        MatNorm_MPIAIJ,
1630 /*20*/ MatAssemblyBegin_MPIAIJ,
1631        MatAssemblyEnd_MPIAIJ,
1632        0,
1633        MatSetOption_MPIAIJ,
1634        MatZeroEntries_MPIAIJ,
1635 /*25*/ MatZeroRows_MPIAIJ,
1636        0,
1637        0,
1638        0,
1639        0,
1640 /*30*/ MatSetUpPreallocation_MPIAIJ,
1641        0,
1642        0,
1643        0,
1644        0,
1645 /*35*/ MatDuplicate_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*40*/ MatAXPY_MPIAIJ,
1651        MatGetSubMatrices_MPIAIJ,
1652        MatIncreaseOverlap_MPIAIJ,
1653        MatGetValues_MPIAIJ,
1654        MatCopy_MPIAIJ,
1655 /*45*/ MatPrintHelp_MPIAIJ,
1656        MatScale_MPIAIJ,
1657        0,
1658        0,
1659        0,
1660 /*50*/ MatSetBlockSize_MPIAIJ,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*55*/ MatFDColoringCreate_MPIAIJ,
1666        0,
1667        MatSetUnfactored_MPIAIJ,
1668        0,
1669        0,
1670 /*60*/ MatGetSubMatrix_MPIAIJ,
1671        MatDestroy_MPIAIJ,
1672        MatView_MPIAIJ,
1673        MatGetPetscMaps_Petsc,
1674        0,
1675 /*65*/ 0,
1676        0,
1677        0,
1678        0,
1679        0,
1680 /*70*/ 0,
1681        0,
1682        MatSetColoring_MPIAIJ,
1683 #if defined(PETSC_HAVE_ADIC)
1684        MatSetValuesAdic_MPIAIJ,
1685 #else
1686        0,
1687 #endif
1688        MatSetValuesAdifor_MPIAIJ,
1689 /*75*/ 0,
1690        0,
1691        0,
1692        0,
1693        0,
1694 /*80*/ 0,
1695        0,
1696        0,
1697        0,
1698 /*84*/ MatLoad_MPIAIJ,
1699        0,
1700        0,
1701        0,
1702        0,
1703        0,
1704 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1705        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1706        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1707        MatPtAP_MPIAIJ_MPIAIJ,
1708        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1709 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1710        0,
1711        0,
1712        0};
1713 
1714 /* ----------------------------------------------------------------------------------------*/
1715 
1716 EXTERN_C_BEGIN
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1719 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1720 {
1721   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1726   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 EXTERN_C_END
1730 
1731 EXTERN_C_BEGIN
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1734 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1735 {
1736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1741   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1742   PetscFunctionReturn(0);
1743 }
1744 EXTERN_C_END
1745 
1746 #include "petscpc.h"
1747 EXTERN_C_BEGIN
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1750 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1751 {
1752   Mat_MPIAIJ     *b;
1753   PetscErrorCode ierr;
1754   PetscInt       i;
1755 
1756   PetscFunctionBegin;
1757   B->preallocated = PETSC_TRUE;
1758   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1759   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1760   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1761   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1762   if (d_nnz) {
1763     for (i=0; i<B->m; i++) {
1764       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1765     }
1766   }
1767   if (o_nnz) {
1768     for (i=0; i<B->m; i++) {
1769       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1770     }
1771   }
1772   b = (Mat_MPIAIJ*)B->data;
1773   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1774   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1775 
1776   PetscFunctionReturn(0);
1777 }
1778 EXTERN_C_END
1779 
1780 /*MC
1781    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1782 
1783    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1784    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1785   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1786   for communicators controlling multiple processes.  It is recommended that you call both of
1787   the above preallocation routines for simplicity.
1788 
1789    Options Database Keys:
1790 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1791 
1792   Level: beginner
1793 
1794 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1795 M*/
1796 
1797 EXTERN_C_BEGIN
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatCreate_AIJ"
1800 PetscErrorCode MatCreate_AIJ(Mat A)
1801 {
1802   PetscErrorCode ierr;
1803   PetscMPIInt    size;
1804 
1805   PetscFunctionBegin;
1806   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1807   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1808   if (size == 1) {
1809     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1810   } else {
1811     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1812   }
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1819 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1820 {
1821   Mat            mat;
1822   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   *newmat       = 0;
1827   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1828   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1829   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1830   a    = (Mat_MPIAIJ*)mat->data;
1831 
1832   mat->factor       = matin->factor;
1833   mat->bs           = matin->bs;
1834   mat->assembled    = PETSC_TRUE;
1835   mat->insertmode   = NOT_SET_VALUES;
1836   mat->preallocated = PETSC_TRUE;
1837 
1838   a->rstart       = oldmat->rstart;
1839   a->rend         = oldmat->rend;
1840   a->cstart       = oldmat->cstart;
1841   a->cend         = oldmat->cend;
1842   a->size         = oldmat->size;
1843   a->rank         = oldmat->rank;
1844   a->donotstash   = oldmat->donotstash;
1845   a->roworiented  = oldmat->roworiented;
1846   a->rowindices   = 0;
1847   a->rowvalues    = 0;
1848   a->getrowactive = PETSC_FALSE;
1849 
1850   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1851   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1852   if (oldmat->colmap) {
1853 #if defined (PETSC_USE_CTABLE)
1854     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1855 #else
1856     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1857     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1858     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1859 #endif
1860   } else a->colmap = 0;
1861   if (oldmat->garray) {
1862     PetscInt len;
1863     len  = oldmat->B->n;
1864     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1865     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1866     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1867   } else a->garray = 0;
1868 
1869   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1870   PetscLogObjectParent(mat,a->lvec);
1871   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1872   PetscLogObjectParent(mat,a->Mvctx);
1873   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1874   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1875   PetscLogObjectParent(mat,a->A);
1876   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1877   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1878   PetscLogObjectParent(mat,a->B);
1879   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1880   *newmat = mat;
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 #include "petscsys.h"
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatLoad_MPIAIJ"
1888 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1889 {
1890   Mat            A;
1891   PetscScalar    *vals,*svals;
1892   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1893   MPI_Status     status;
1894   PetscErrorCode ierr;
1895   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1896   PetscInt       i,nz,j,rstart,rend;
1897   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1898   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1899   PetscInt       cend,cstart,n,*rowners;
1900   int            fd;
1901 
1902   PetscFunctionBegin;
1903   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1904   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1905   if (!rank) {
1906     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1907     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1908     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1909   }
1910 
1911   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1912   M = header[1]; N = header[2];
1913   /* determine ownership of all rows */
1914   m    = M/size + ((M % size) > rank);
1915   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1916   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1917   rowners[0] = 0;
1918   for (i=2; i<=size; i++) {
1919     rowners[i] += rowners[i-1];
1920   }
1921   rstart = rowners[rank];
1922   rend   = rowners[rank+1];
1923 
1924   /* distribute row lengths to all processors */
1925   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1926   if (!rank) {
1927     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1928     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1929     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1930     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1931     for (j=0; j<m; j++) {
1932       procsnz[0] += ourlens[j];
1933     }
1934     for (i=1; i<size; i++) {
1935       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1936       /* calculate the number of nonzeros on each processor */
1937       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1938         procsnz[i] += rowlengths[j];
1939       }
1940       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1941     }
1942     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1943   } else {
1944     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1945   }
1946 
1947   if (!rank) {
1948     /* determine max buffer needed and allocate it */
1949     maxnz = 0;
1950     for (i=0; i<size; i++) {
1951       maxnz = PetscMax(maxnz,procsnz[i]);
1952     }
1953     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1954 
1955     /* read in my part of the matrix column indices  */
1956     nz   = procsnz[0];
1957     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1958     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1959 
1960     /* read in every one elses and ship off */
1961     for (i=1; i<size; i++) {
1962       nz   = procsnz[i];
1963       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1964       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1965     }
1966     ierr = PetscFree(cols);CHKERRQ(ierr);
1967   } else {
1968     /* determine buffer space needed for message */
1969     nz = 0;
1970     for (i=0; i<m; i++) {
1971       nz += ourlens[i];
1972     }
1973     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1974 
1975     /* receive message of column indices*/
1976     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1977     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1978     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1979   }
1980 
1981   /* determine column ownership if matrix is not square */
1982   if (N != M) {
1983     n      = N/size + ((N % size) > rank);
1984     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1985     cstart = cend - n;
1986   } else {
1987     cstart = rstart;
1988     cend   = rend;
1989     n      = cend - cstart;
1990   }
1991 
1992   /* loop over local rows, determining number of off diagonal entries */
1993   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1994   jj = 0;
1995   for (i=0; i<m; i++) {
1996     for (j=0; j<ourlens[i]; j++) {
1997       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1998       jj++;
1999     }
2000   }
2001 
2002   /* create our matrix */
2003   for (i=0; i<m; i++) {
2004     ourlens[i] -= offlens[i];
2005   }
2006   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2007   ierr = MatSetType(A,type);CHKERRQ(ierr);
2008   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2009 
2010   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2011   for (i=0; i<m; i++) {
2012     ourlens[i] += offlens[i];
2013   }
2014 
2015   if (!rank) {
2016     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2017 
2018     /* read in my part of the matrix numerical values  */
2019     nz   = procsnz[0];
2020     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2021 
2022     /* insert into matrix */
2023     jj      = rstart;
2024     smycols = mycols;
2025     svals   = vals;
2026     for (i=0; i<m; i++) {
2027       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2028       smycols += ourlens[i];
2029       svals   += ourlens[i];
2030       jj++;
2031     }
2032 
2033     /* read in other processors and ship out */
2034     for (i=1; i<size; i++) {
2035       nz   = procsnz[i];
2036       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2037       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2038     }
2039     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2040   } else {
2041     /* receive numeric values */
2042     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2043 
2044     /* receive message of values*/
2045     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2046     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2047     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2048 
2049     /* insert into matrix */
2050     jj      = rstart;
2051     smycols = mycols;
2052     svals   = vals;
2053     for (i=0; i<m; i++) {
2054       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2055       smycols += ourlens[i];
2056       svals   += ourlens[i];
2057       jj++;
2058     }
2059   }
2060   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2061   ierr = PetscFree(vals);CHKERRQ(ierr);
2062   ierr = PetscFree(mycols);CHKERRQ(ierr);
2063   ierr = PetscFree(rowners);CHKERRQ(ierr);
2064 
2065   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2066   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2067   *newmat = A;
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2073 /*
2074     Not great since it makes two copies of the submatrix, first an SeqAIJ
2075   in local and then by concatenating the local matrices the end result.
2076   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2077 */
2078 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2079 {
2080   PetscErrorCode ierr;
2081   PetscMPIInt    rank,size;
2082   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2083   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2084   Mat            *local,M,Mreuse;
2085   PetscScalar    *vwork,*aa;
2086   MPI_Comm       comm = mat->comm;
2087   Mat_SeqAIJ     *aij;
2088 
2089 
2090   PetscFunctionBegin;
2091   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2092   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2093 
2094   if (call ==  MAT_REUSE_MATRIX) {
2095     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2096     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2097     local = &Mreuse;
2098     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2099   } else {
2100     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2101     Mreuse = *local;
2102     ierr   = PetscFree(local);CHKERRQ(ierr);
2103   }
2104 
2105   /*
2106       m - number of local rows
2107       n - number of columns (same on all processors)
2108       rstart - first row in new global matrix generated
2109   */
2110   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2111   if (call == MAT_INITIAL_MATRIX) {
2112     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2113     ii  = aij->i;
2114     jj  = aij->j;
2115 
2116     /*
2117         Determine the number of non-zeros in the diagonal and off-diagonal
2118         portions of the matrix in order to do correct preallocation
2119     */
2120 
2121     /* first get start and end of "diagonal" columns */
2122     if (csize == PETSC_DECIDE) {
2123       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2124       if (mglobal == n) { /* square matrix */
2125 	nlocal = m;
2126       } else {
2127         nlocal = n/size + ((n % size) > rank);
2128       }
2129     } else {
2130       nlocal = csize;
2131     }
2132     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2133     rstart = rend - nlocal;
2134     if (rank == size - 1 && rend != n) {
2135       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2136     }
2137 
2138     /* next, compute all the lengths */
2139     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2140     olens = dlens + m;
2141     for (i=0; i<m; i++) {
2142       jend = ii[i+1] - ii[i];
2143       olen = 0;
2144       dlen = 0;
2145       for (j=0; j<jend; j++) {
2146         if (*jj < rstart || *jj >= rend) olen++;
2147         else dlen++;
2148         jj++;
2149       }
2150       olens[i] = olen;
2151       dlens[i] = dlen;
2152     }
2153     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2154     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2155     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2156     ierr = PetscFree(dlens);CHKERRQ(ierr);
2157   } else {
2158     PetscInt ml,nl;
2159 
2160     M = *newmat;
2161     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2162     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2163     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2164     /*
2165          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2166        rather than the slower MatSetValues().
2167     */
2168     M->was_assembled = PETSC_TRUE;
2169     M->assembled     = PETSC_FALSE;
2170   }
2171   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2172   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2173   ii  = aij->i;
2174   jj  = aij->j;
2175   aa  = aij->a;
2176   for (i=0; i<m; i++) {
2177     row   = rstart + i;
2178     nz    = ii[i+1] - ii[i];
2179     cwork = jj;     jj += nz;
2180     vwork = aa;     aa += nz;
2181     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2182   }
2183 
2184   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2185   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2186   *newmat = M;
2187 
2188   /* save submatrix used in processor for next request */
2189   if (call ==  MAT_INITIAL_MATRIX) {
2190     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2191     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2192   }
2193 
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 EXTERN_C_BEGIN
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2200 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2201 {
2202   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2203   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2204   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2205   const PetscInt *JJ;
2206   PetscScalar    *values;
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210 #if defined(PETSC_OPT_g)
2211   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2212 #endif
2213   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2214   o_nnz = d_nnz + m;
2215 
2216   for (i=0; i<m; i++) {
2217     nnz     = I[i+1]- I[i];
2218     JJ      = J + I[i];
2219     nnz_max = PetscMax(nnz_max,nnz);
2220 #if defined(PETSC_OPT_g)
2221     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2222 #endif
2223     for (j=0; j<nnz; j++) {
2224       if (*JJ >= cstart) break;
2225       JJ++;
2226     }
2227     d = 0;
2228     for (; j<nnz; j++) {
2229       if (*JJ++ >= cend) break;
2230       d++;
2231     }
2232     d_nnz[i] = d;
2233     o_nnz[i] = nnz - d;
2234   }
2235   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2236   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2237 
2238   if (v) values = (PetscScalar*)v;
2239   else {
2240     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2241     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2242   }
2243 
2244   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2245   for (i=0; i<m; i++) {
2246     ii   = i + rstart;
2247     nnz  = I[i+1]- I[i];
2248     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2249   }
2250   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2252   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2253 
2254   if (!v) {
2255     ierr = PetscFree(values);CHKERRQ(ierr);
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 EXTERN_C_END
2260 
2261 #undef __FUNCT__
2262 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2263 /*@C
2264    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2265    (the default parallel PETSc format).
2266 
2267    Collective on MPI_Comm
2268 
2269    Input Parameters:
2270 +  A - the matrix
2271 .  i - the indices into j for the start of each local row (starts with zero)
2272 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2273 -  v - optional values in the matrix
2274 
2275    Level: developer
2276 
2277 .keywords: matrix, aij, compressed row, sparse, parallel
2278 
2279 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2280 @*/
2281 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2282 {
2283   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2284 
2285   PetscFunctionBegin;
2286   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2287   if (f) {
2288     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2295 /*@C
2296    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2297    (the default parallel PETSc format).  For good matrix assembly performance
2298    the user should preallocate the matrix storage by setting the parameters
2299    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2300    performance can be increased by more than a factor of 50.
2301 
2302    Collective on MPI_Comm
2303 
2304    Input Parameters:
2305 +  A - the matrix
2306 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2307            (same value is used for all local rows)
2308 .  d_nnz - array containing the number of nonzeros in the various rows of the
2309            DIAGONAL portion of the local submatrix (possibly different for each row)
2310            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2311            The size of this array is equal to the number of local rows, i.e 'm'.
2312            You must leave room for the diagonal entry even if it is zero.
2313 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2314            submatrix (same value is used for all local rows).
2315 -  o_nnz - array containing the number of nonzeros in the various rows of the
2316            OFF-DIAGONAL portion of the local submatrix (possibly different for
2317            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2318            structure. The size of this array is equal to the number
2319            of local rows, i.e 'm'.
2320 
2321    If the *_nnz parameter is given then the *_nz parameter is ignored
2322 
2323    The AIJ format (also called the Yale sparse matrix format or
2324    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2325    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2326 
2327    The parallel matrix is partitioned such that the first m0 rows belong to
2328    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2329    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2330 
2331    The DIAGONAL portion of the local submatrix of a processor can be defined
2332    as the submatrix which is obtained by extraction the part corresponding
2333    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2334    first row that belongs to the processor, and r2 is the last row belonging
2335    to the this processor. This is a square mxm matrix. The remaining portion
2336    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2337 
2338    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2339 
2340    Example usage:
2341 
2342    Consider the following 8x8 matrix with 34 non-zero values, that is
2343    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2344    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2345    as follows:
2346 
2347 .vb
2348             1  2  0  |  0  3  0  |  0  4
2349     Proc0   0  5  6  |  7  0  0  |  8  0
2350             9  0 10  | 11  0  0  | 12  0
2351     -------------------------------------
2352            13  0 14  | 15 16 17  |  0  0
2353     Proc1   0 18  0  | 19 20 21  |  0  0
2354             0  0  0  | 22 23  0  | 24  0
2355     -------------------------------------
2356     Proc2  25 26 27  |  0  0 28  | 29  0
2357            30  0  0  | 31 32 33  |  0 34
2358 .ve
2359 
2360    This can be represented as a collection of submatrices as:
2361 
2362 .vb
2363       A B C
2364       D E F
2365       G H I
2366 .ve
2367 
2368    Where the submatrices A,B,C are owned by proc0, D,E,F are
2369    owned by proc1, G,H,I are owned by proc2.
2370 
2371    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2372    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2373    The 'M','N' parameters are 8,8, and have the same values on all procs.
2374 
2375    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2376    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2377    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2378    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2379    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2380    matrix, ans [DF] as another SeqAIJ matrix.
2381 
2382    When d_nz, o_nz parameters are specified, d_nz storage elements are
2383    allocated for every row of the local diagonal submatrix, and o_nz
2384    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2385    One way to choose d_nz and o_nz is to use the max nonzerors per local
2386    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2387    In this case, the values of d_nz,o_nz are:
2388 .vb
2389      proc0 : dnz = 2, o_nz = 2
2390      proc1 : dnz = 3, o_nz = 2
2391      proc2 : dnz = 1, o_nz = 4
2392 .ve
2393    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2394    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2395    for proc3. i.e we are using 12+15+10=37 storage locations to store
2396    34 values.
2397 
2398    When d_nnz, o_nnz parameters are specified, the storage is specified
2399    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2400    In the above case the values for d_nnz,o_nnz are:
2401 .vb
2402      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2403      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2404      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2405 .ve
2406    Here the space allocated is sum of all the above values i.e 34, and
2407    hence pre-allocation is perfect.
2408 
2409    Level: intermediate
2410 
2411 .keywords: matrix, aij, compressed row, sparse, parallel
2412 
2413 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2414           MPIAIJ
2415 @*/
2416 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2417 {
2418   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2419 
2420   PetscFunctionBegin;
2421   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2422   if (f) {
2423     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2424   }
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatCreateMPIAIJ"
2430 /*@C
2431    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2432    (the default parallel PETSc format).  For good matrix assembly performance
2433    the user should preallocate the matrix storage by setting the parameters
2434    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2435    performance can be increased by more than a factor of 50.
2436 
2437    Collective on MPI_Comm
2438 
2439    Input Parameters:
2440 +  comm - MPI communicator
2441 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2442            This value should be the same as the local size used in creating the
2443            y vector for the matrix-vector product y = Ax.
2444 .  n - This value should be the same as the local size used in creating the
2445        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2446        calculated if N is given) For square matrices n is almost always m.
2447 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2448 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2449 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2450            (same value is used for all local rows)
2451 .  d_nnz - array containing the number of nonzeros in the various rows of the
2452            DIAGONAL portion of the local submatrix (possibly different for each row)
2453            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2454            The size of this array is equal to the number of local rows, i.e 'm'.
2455            You must leave room for the diagonal entry even if it is zero.
2456 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2457            submatrix (same value is used for all local rows).
2458 -  o_nnz - array containing the number of nonzeros in the various rows of the
2459            OFF-DIAGONAL portion of the local submatrix (possibly different for
2460            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2461            structure. The size of this array is equal to the number
2462            of local rows, i.e 'm'.
2463 
2464    Output Parameter:
2465 .  A - the matrix
2466 
2467    Notes:
2468    If the *_nnz parameter is given then the *_nz parameter is ignored
2469 
2470    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2471    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2472    storage requirements for this matrix.
2473 
2474    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2475    processor than it must be used on all processors that share the object for
2476    that argument.
2477 
2478    The user MUST specify either the local or global matrix dimensions
2479    (possibly both).
2480 
2481    The parallel matrix is partitioned such that the first m0 rows belong to
2482    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2483    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2484 
2485    The DIAGONAL portion of the local submatrix of a processor can be defined
2486    as the submatrix which is obtained by extraction the part corresponding
2487    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2488    first row that belongs to the processor, and r2 is the last row belonging
2489    to the this processor. This is a square mxm matrix. The remaining portion
2490    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2491 
2492    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2493 
2494    When calling this routine with a single process communicator, a matrix of
2495    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2496    type of communicator, use the construction mechanism:
2497      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2498 
2499    By default, this format uses inodes (identical nodes) when possible.
2500    We search for consecutive rows with the same nonzero structure, thereby
2501    reusing matrix information to achieve increased efficiency.
2502 
2503    Options Database Keys:
2504 +  -mat_aij_no_inode  - Do not use inodes
2505 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2506 -  -mat_aij_oneindex - Internally use indexing starting at 1
2507         rather than 0.  Note that when calling MatSetValues(),
2508         the user still MUST index entries starting at 0!
2509 
2510 
2511    Example usage:
2512 
2513    Consider the following 8x8 matrix with 34 non-zero values, that is
2514    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2515    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2516    as follows:
2517 
2518 .vb
2519             1  2  0  |  0  3  0  |  0  4
2520     Proc0   0  5  6  |  7  0  0  |  8  0
2521             9  0 10  | 11  0  0  | 12  0
2522     -------------------------------------
2523            13  0 14  | 15 16 17  |  0  0
2524     Proc1   0 18  0  | 19 20 21  |  0  0
2525             0  0  0  | 22 23  0  | 24  0
2526     -------------------------------------
2527     Proc2  25 26 27  |  0  0 28  | 29  0
2528            30  0  0  | 31 32 33  |  0 34
2529 .ve
2530 
2531    This can be represented as a collection of submatrices as:
2532 
2533 .vb
2534       A B C
2535       D E F
2536       G H I
2537 .ve
2538 
2539    Where the submatrices A,B,C are owned by proc0, D,E,F are
2540    owned by proc1, G,H,I are owned by proc2.
2541 
2542    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2543    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2544    The 'M','N' parameters are 8,8, and have the same values on all procs.
2545 
2546    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2547    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2548    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2549    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2550    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2551    matrix, ans [DF] as another SeqAIJ matrix.
2552 
2553    When d_nz, o_nz parameters are specified, d_nz storage elements are
2554    allocated for every row of the local diagonal submatrix, and o_nz
2555    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2556    One way to choose d_nz and o_nz is to use the max nonzerors per local
2557    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2558    In this case, the values of d_nz,o_nz are:
2559 .vb
2560      proc0 : dnz = 2, o_nz = 2
2561      proc1 : dnz = 3, o_nz = 2
2562      proc2 : dnz = 1, o_nz = 4
2563 .ve
2564    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2565    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2566    for proc3. i.e we are using 12+15+10=37 storage locations to store
2567    34 values.
2568 
2569    When d_nnz, o_nnz parameters are specified, the storage is specified
2570    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2571    In the above case the values for d_nnz,o_nnz are:
2572 .vb
2573      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2574      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2575      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2576 .ve
2577    Here the space allocated is sum of all the above values i.e 34, and
2578    hence pre-allocation is perfect.
2579 
2580    Level: intermediate
2581 
2582 .keywords: matrix, aij, compressed row, sparse, parallel
2583 
2584 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2585           MPIAIJ
2586 @*/
2587 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2588 {
2589   PetscErrorCode ierr;
2590   PetscMPIInt    size;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2594   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2595   if (size > 1) {
2596     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2597     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2598   } else {
2599     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2600     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2601   }
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2607 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2608 {
2609   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2610 
2611   PetscFunctionBegin;
2612   *Ad     = a->A;
2613   *Ao     = a->B;
2614   *colmap = a->garray;
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 #undef __FUNCT__
2619 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2620 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2621 {
2622   PetscErrorCode ierr;
2623   PetscInt       i;
2624   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2625 
2626   PetscFunctionBegin;
2627   if (coloring->ctype == IS_COLORING_LOCAL) {
2628     ISColoringValue *allcolors,*colors;
2629     ISColoring      ocoloring;
2630 
2631     /* set coloring for diagonal portion */
2632     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2633 
2634     /* set coloring for off-diagonal portion */
2635     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2636     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2637     for (i=0; i<a->B->n; i++) {
2638       colors[i] = allcolors[a->garray[i]];
2639     }
2640     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2641     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2642     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2643     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2644   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2645     ISColoringValue *colors;
2646     PetscInt             *larray;
2647     ISColoring      ocoloring;
2648 
2649     /* set coloring for diagonal portion */
2650     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2651     for (i=0; i<a->A->n; i++) {
2652       larray[i] = i + a->cstart;
2653     }
2654     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2655     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2656     for (i=0; i<a->A->n; i++) {
2657       colors[i] = coloring->colors[larray[i]];
2658     }
2659     ierr = PetscFree(larray);CHKERRQ(ierr);
2660     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2661     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2662     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2663 
2664     /* set coloring for off-diagonal portion */
2665     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2666     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2667     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2668     for (i=0; i<a->B->n; i++) {
2669       colors[i] = coloring->colors[larray[i]];
2670     }
2671     ierr = PetscFree(larray);CHKERRQ(ierr);
2672     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2673     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2674     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2675   } else {
2676     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2677   }
2678 
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #if defined(PETSC_HAVE_ADIC)
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2685 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2686 {
2687   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2688   PetscErrorCode ierr;
2689 
2690   PetscFunctionBegin;
2691   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2692   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2693   PetscFunctionReturn(0);
2694 }
2695 #endif
2696 
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2699 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2700 {
2701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2706   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatMerge"
2712 /*@C
2713       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2714                  matrices from each processor
2715 
2716     Collective on MPI_Comm
2717 
2718    Input Parameters:
2719 +    comm - the communicators the parallel matrix will live on
2720 .    inmat - the input sequential matrices
2721 .    n - number of local columns (or PETSC_DECIDE)
2722 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2723 
2724    Output Parameter:
2725 .    outmat - the parallel matrix generated
2726 
2727     Level: advanced
2728 
2729    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2730 
2731 @*/
2732 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2733 {
2734   PetscErrorCode ierr;
2735   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2736   PetscInt       *indx;
2737   PetscScalar    *values;
2738   PetscMap       columnmap,rowmap;
2739 
2740   PetscFunctionBegin;
2741     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2742   /*
2743   PetscMPIInt       rank;
2744   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2745   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2746   */
2747   if (scall == MAT_INITIAL_MATRIX){
2748     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2749     if (n == PETSC_DECIDE){
2750       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2751       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2752       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2753       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2754       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2755     }
2756 
2757     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2758     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2759     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2760     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2761     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2762 
2763     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2764     for (i=0;i<m;i++) {
2765       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2766       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2767       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2768     }
2769     /* This routine will ONLY return MPIAIJ type matrix */
2770     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2771     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2772     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2773     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2774 
2775   } else if (scall == MAT_REUSE_MATRIX){
2776     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2777   } else {
2778     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2779   }
2780 
2781   for (i=0;i<m;i++) {
2782     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2783     I    = i + rstart;
2784     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2785     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2786   }
2787   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2788   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2790 
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatFileSplit"
2796 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2797 {
2798   PetscErrorCode    ierr;
2799   PetscMPIInt       rank;
2800   PetscInt          m,N,i,rstart,nnz;
2801   size_t            len;
2802   const PetscInt    *indx;
2803   PetscViewer       out;
2804   char              *name;
2805   Mat               B;
2806   const PetscScalar *values;
2807 
2808   PetscFunctionBegin;
2809   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2810   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2811   /* Should this be the type of the diagonal block of A? */
2812   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2813   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2814   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2815   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2816   for (i=0;i<m;i++) {
2817     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2818     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2819     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2820   }
2821   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2822   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2823 
2824   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2825   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2826   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2827   sprintf(name,"%s.%d",outfile,rank);
2828   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2829   ierr = PetscFree(name);
2830   ierr = MatView(B,out);CHKERRQ(ierr);
2831   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2832   ierr = MatDestroy(B);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2839 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2840 {
2841   PetscErrorCode       ierr;
2842   Mat_Merge_SeqsToMPI  *merge;
2843   PetscObjectContainer container;
2844 
2845   PetscFunctionBegin;
2846   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2847   if (container) {
2848     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2850     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2851     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2852     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2853     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2854     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2855     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2856     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2857     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2858     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2859     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2860 
2861     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2862     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2863   }
2864   ierr = PetscFree(merge);CHKERRQ(ierr);
2865 
2866   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2867   PetscFunctionReturn(0);
2868 }
2869 
2870 #include "src/mat/utils/freespace.h"
2871 #include "petscbt.h"
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatMerge_SeqsToMPI"
2874 /*@C
2875       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2876                  matrices from each processor
2877 
2878     Collective on MPI_Comm
2879 
2880    Input Parameters:
2881 +    comm - the communicators the parallel matrix will live on
2882 .    seqmat - the input sequential matrices
2883 .    m - number of local rows (or PETSC_DECIDE)
2884 .    n - number of local columns (or PETSC_DECIDE)
2885 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2886 
2887    Output Parameter:
2888 .    mpimat - the parallel matrix generated
2889 
2890     Level: advanced
2891 
2892    Notes:
2893      The dimensions of the sequential matrix in each processor MUST be the same.
2894      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2895      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2896 @*/
2897 static PetscEvent logkey_seqstompinum = 0;
2898 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2899 {
2900   PetscErrorCode       ierr;
2901   MPI_Comm             comm=mpimat->comm;
2902   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2903   PetscMPIInt          size,rank,taga,*len_s;
2904   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2905   PetscInt             proc,m;
2906   PetscInt             **buf_ri,**buf_rj;
2907   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2908   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2909   MPI_Request          *s_waits,*r_waits;
2910   MPI_Status           *status;
2911   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2912   Mat_Merge_SeqsToMPI  *merge;
2913   PetscObjectContainer container;
2914 
2915   PetscFunctionBegin;
2916   if (!logkey_seqstompinum) {
2917     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2918   }
2919   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2920 
2921   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2922   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2923 
2924   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2925   if (container) {
2926     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2927   }
2928   bi     = merge->bi;
2929   bj     = merge->bj;
2930   buf_ri = merge->buf_ri;
2931   buf_rj = merge->buf_rj;
2932 
2933   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2934   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2935   len_s  = merge->len_s;
2936 
2937   /* send and recv matrix values */
2938   /*-----------------------------*/
2939   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2940   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2941 
2942   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2943   for (proc=0,k=0; proc<size; proc++){
2944     if (!len_s[proc]) continue;
2945     i = owners[proc];
2946     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2947     k++;
2948   }
2949 
2950   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2951   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2952   ierr = PetscFree(status);CHKERRQ(ierr);
2953 
2954   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2955   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2956 
2957   /* insert mat values of mpimat */
2958   /*----------------------------*/
2959   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2960   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2961   nextrow = buf_ri_k + merge->nrecv;
2962   nextai  = nextrow + merge->nrecv;
2963 
2964   for (k=0; k<merge->nrecv; k++){
2965     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2966     nrows = *(buf_ri_k[k]);
2967     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2968     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2969   }
2970 
2971   /* set values of ba */
2972   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2973   for (i=0; i<m; i++) {
2974     arow = owners[rank] + i;
2975     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2976     bnzi = bi[i+1] - bi[i];
2977     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2978 
2979     /* add local non-zero vals of this proc's seqmat into ba */
2980     anzi = ai[arow+1] - ai[arow];
2981     aj   = a->j + ai[arow];
2982     aa   = a->a + ai[arow];
2983     nextaj = 0;
2984     for (j=0; nextaj<anzi; j++){
2985       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2986         ba_i[j] += aa[nextaj++];
2987       }
2988     }
2989 
2990     /* add received vals into ba */
2991     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2992       /* i-th row */
2993       if (i == *nextrow[k]) {
2994         anzi = *(nextai[k]+1) - *nextai[k];
2995         aj   = buf_rj[k] + *(nextai[k]);
2996         aa   = abuf_r[k] + *(nextai[k]);
2997         nextaj = 0;
2998         for (j=0; nextaj<anzi; j++){
2999           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3000             ba_i[j] += aa[nextaj++];
3001           }
3002         }
3003         nextrow[k]++; nextai[k]++;
3004       }
3005     }
3006     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3007   }
3008   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3009   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3010 
3011   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3012   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3013   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3014   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3015   PetscFunctionReturn(0);
3016 }
3017 static PetscEvent logkey_seqstompisym = 0;
3018 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3019 {
3020   PetscErrorCode       ierr;
3021   Mat                  B_mpi;
3022   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3023   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3024   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3025   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3026   PetscInt             len,proc,*dnz,*onz;
3027   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3028   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3029   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3030   MPI_Status           *status;
3031   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3032   PetscBT              lnkbt;
3033   Mat_Merge_SeqsToMPI  *merge;
3034   PetscObjectContainer container;
3035 
3036   PetscFunctionBegin;
3037   if (!logkey_seqstompisym) {
3038     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3039   }
3040   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3041 
3042   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3043   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3044 
3045   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3046   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3047 
3048   /* determine row ownership */
3049   /*---------------------------------------------------------*/
3050   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3051   if (m == PETSC_DECIDE) {
3052     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3053   } else {
3054     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3055   }
3056   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3057   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3058   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3059 
3060   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3061   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3062 
3063   /* determine the number of messages to send, their lengths */
3064   /*---------------------------------------------------------*/
3065   len_s  = merge->len_s;
3066 
3067   len = 0;  /* length of buf_si[] */
3068   merge->nsend = 0;
3069   for (proc=0; proc<size; proc++){
3070     len_si[proc] = 0;
3071     if (proc == rank){
3072       len_s[proc] = 0;
3073     } else {
3074       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3075       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3076     }
3077     if (len_s[proc]) {
3078       merge->nsend++;
3079       nrows = 0;
3080       for (i=owners[proc]; i<owners[proc+1]; i++){
3081         if (ai[i+1] > ai[i]) nrows++;
3082       }
3083       len_si[proc] = 2*(nrows+1);
3084       len += len_si[proc];
3085     }
3086   }
3087 
3088   /* determine the number and length of messages to receive for ij-structure */
3089   /*-------------------------------------------------------------------------*/
3090   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3091   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3092 
3093   /* post the Irecv of j-structure */
3094   /*-------------------------------*/
3095   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3096   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3097 
3098   /* post the Isend of j-structure */
3099   /*--------------------------------*/
3100   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3101   sj_waits = si_waits + merge->nsend;
3102 
3103   for (proc=0, k=0; proc<size; proc++){
3104     if (!len_s[proc]) continue;
3105     i = owners[proc];
3106     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3107     k++;
3108   }
3109 
3110   /* receives and sends of j-structure are complete */
3111   /*------------------------------------------------*/
3112   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3113   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3114 
3115   /* send and recv i-structure */
3116   /*---------------------------*/
3117   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3118   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3119 
3120   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3121   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3122   for (proc=0,k=0; proc<size; proc++){
3123     if (!len_s[proc]) continue;
3124     /* form outgoing message for i-structure:
3125          buf_si[0]:                 nrows to be sent
3126                [1:nrows]:           row index (global)
3127                [nrows+1:2*nrows+1]: i-structure index
3128     */
3129     /*-------------------------------------------*/
3130     nrows = len_si[proc]/2 - 1;
3131     buf_si_i    = buf_si + nrows+1;
3132     buf_si[0]   = nrows;
3133     buf_si_i[0] = 0;
3134     nrows = 0;
3135     for (i=owners[proc]; i<owners[proc+1]; i++){
3136       anzi = ai[i+1] - ai[i];
3137       if (anzi) {
3138         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3139         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3140         nrows++;
3141       }
3142     }
3143     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3144     k++;
3145     buf_si += len_si[proc];
3146   }
3147 
3148   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3149   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3150 
3151   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3152   for (i=0; i<merge->nrecv; i++){
3153     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3154   }
3155 
3156   ierr = PetscFree(len_si);CHKERRQ(ierr);
3157   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3158   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3159   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3160   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3161   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3162   ierr = PetscFree(status);CHKERRQ(ierr);
3163 
3164   /* compute a local seq matrix in each processor */
3165   /*----------------------------------------------*/
3166   /* allocate bi array and free space for accumulating nonzero column info */
3167   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3168   bi[0] = 0;
3169 
3170   /* create and initialize a linked list */
3171   nlnk = N+1;
3172   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3173 
3174   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3175   len = 0;
3176   len  = ai[owners[rank+1]] - ai[owners[rank]];
3177   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3178   current_space = free_space;
3179 
3180   /* determine symbolic info for each local row */
3181   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3182   nextrow = buf_ri_k + merge->nrecv;
3183   nextai  = nextrow + merge->nrecv;
3184   for (k=0; k<merge->nrecv; k++){
3185     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3186     nrows = *buf_ri_k[k];
3187     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3188     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3189   }
3190 
3191   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3192   len = 0;
3193   for (i=0;i<m;i++) {
3194     bnzi   = 0;
3195     /* add local non-zero cols of this proc's seqmat into lnk */
3196     arow   = owners[rank] + i;
3197     anzi   = ai[arow+1] - ai[arow];
3198     aj     = a->j + ai[arow];
3199     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3200     bnzi += nlnk;
3201     /* add received col data into lnk */
3202     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3203       if (i == *nextrow[k]) { /* i-th row */
3204         anzi = *(nextai[k]+1) - *nextai[k];
3205         aj   = buf_rj[k] + *nextai[k];
3206         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3207         bnzi += nlnk;
3208         nextrow[k]++; nextai[k]++;
3209       }
3210     }
3211     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3212 
3213     /* if free space is not available, make more free space */
3214     if (current_space->local_remaining<bnzi) {
3215       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3216       nspacedouble++;
3217     }
3218     /* copy data into free space, then initialize lnk */
3219     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3220     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3221 
3222     current_space->array           += bnzi;
3223     current_space->local_used      += bnzi;
3224     current_space->local_remaining -= bnzi;
3225 
3226     bi[i+1] = bi[i] + bnzi;
3227   }
3228 
3229   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3230 
3231   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3232   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3233   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3234 
3235   /* create symbolic parallel matrix B_mpi */
3236   /*---------------------------------------*/
3237   if (n==PETSC_DECIDE) {
3238     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3239   } else {
3240     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3241   }
3242   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3243   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3244   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3245 
3246   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3247   B_mpi->assembled     = PETSC_FALSE;
3248   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3249   merge->bi            = bi;
3250   merge->bj            = bj;
3251   merge->buf_ri        = buf_ri;
3252   merge->buf_rj        = buf_rj;
3253   merge->coi           = PETSC_NULL;
3254   merge->coj           = PETSC_NULL;
3255   merge->owners_co     = PETSC_NULL;
3256 
3257   /* attach the supporting struct to B_mpi for reuse */
3258   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3259   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3260   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3261   *mpimat = B_mpi;
3262   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3263   PetscFunctionReturn(0);
3264 }
3265 
3266 static PetscEvent logkey_seqstompi = 0;
3267 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3268 {
3269   PetscErrorCode   ierr;
3270 
3271   PetscFunctionBegin;
3272   if (!logkey_seqstompi) {
3273     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3274   }
3275   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3276   if (scall == MAT_INITIAL_MATRIX){
3277     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3278   }
3279   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3280   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3281   PetscFunctionReturn(0);
3282 }
3283 static PetscEvent logkey_getlocalmat = 0;
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatGetLocalMat"
3286 /*@C
3287      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3288 
3289     Not Collective
3290 
3291    Input Parameters:
3292 +    A - the matrix
3293 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3294 
3295    Output Parameter:
3296 .    A_loc - the local sequential matrix generated
3297 
3298     Level: developer
3299 
3300 @*/
3301 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3302 {
3303   PetscErrorCode  ierr;
3304   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3305   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3306   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3307   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3308   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3309   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3310 
3311   PetscFunctionBegin;
3312   if (!logkey_getlocalmat) {
3313     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3314   }
3315   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3316   if (scall == MAT_INITIAL_MATRIX){
3317     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3318     ci[0] = 0;
3319     for (i=0; i<am; i++){
3320       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3321     }
3322     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3323     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3324     k = 0;
3325     for (i=0; i<am; i++) {
3326       ncols_o = bi[i+1] - bi[i];
3327       ncols_d = ai[i+1] - ai[i];
3328       /* off-diagonal portion of A */
3329       for (jo=0; jo<ncols_o; jo++) {
3330         col = cmap[*bj];
3331         if (col >= cstart) break;
3332         cj[k]   = col; bj++;
3333         ca[k++] = *ba++;
3334       }
3335       /* diagonal portion of A */
3336       for (j=0; j<ncols_d; j++) {
3337         cj[k]   = cstart + *aj++;
3338         ca[k++] = *aa++;
3339       }
3340       /* off-diagonal portion of A */
3341       for (j=jo; j<ncols_o; j++) {
3342         cj[k]   = cmap[*bj++];
3343         ca[k++] = *ba++;
3344       }
3345     }
3346     /* put together the new matrix */
3347     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3348     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3349     /* Since these are PETSc arrays, change flags to free them as necessary. */
3350     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3351     mat->freedata = PETSC_TRUE;
3352     mat->nonew    = 0;
3353   } else if (scall == MAT_REUSE_MATRIX){
3354     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3355     ci = mat->i; cj = mat->j; ca = mat->a;
3356     for (i=0; i<am; i++) {
3357       /* off-diagonal portion of A */
3358       ncols_o = bi[i+1] - bi[i];
3359       for (jo=0; jo<ncols_o; jo++) {
3360         col = cmap[*bj];
3361         if (col >= cstart) break;
3362         *ca++ = *ba++; bj++;
3363       }
3364       /* diagonal portion of A */
3365       ncols_d = ai[i+1] - ai[i];
3366       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3367       /* off-diagonal portion of A */
3368       for (j=jo; j<ncols_o; j++) {
3369         *ca++ = *ba++; bj++;
3370       }
3371     }
3372   } else {
3373     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3374   }
3375 
3376   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3377   PetscFunctionReturn(0);
3378 }
3379 
3380 static PetscEvent logkey_getlocalmatcondensed = 0;
3381 #undef __FUNCT__
3382 #define __FUNCT__ "MatGetLocalMatCondensed"
3383 /*@C
3384      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3385 
3386     Not Collective
3387 
3388    Input Parameters:
3389 +    A - the matrix
3390 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3391 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3392 
3393    Output Parameter:
3394 .    A_loc - the local sequential matrix generated
3395 
3396     Level: developer
3397 
3398 @*/
3399 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3400 {
3401   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3402   PetscErrorCode    ierr;
3403   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3404   IS                isrowa,iscola;
3405   Mat               *aloc;
3406 
3407   PetscFunctionBegin;
3408   if (!logkey_getlocalmatcondensed) {
3409     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3410   }
3411   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3412   if (!row){
3413     start = a->rstart; end = a->rend;
3414     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3415   } else {
3416     isrowa = *row;
3417   }
3418   if (!col){
3419     start = a->cstart;
3420     cmap  = a->garray;
3421     nzA   = a->A->n;
3422     nzB   = a->B->n;
3423     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3424     ncols = 0;
3425     for (i=0; i<nzB; i++) {
3426       if (cmap[i] < start) idx[ncols++] = cmap[i];
3427       else break;
3428     }
3429     imark = i;
3430     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3431     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3432     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3433     ierr = PetscFree(idx);CHKERRQ(ierr);
3434   } else {
3435     iscola = *col;
3436   }
3437   if (scall != MAT_INITIAL_MATRIX){
3438     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3439     aloc[0] = *A_loc;
3440   }
3441   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3442   *A_loc = aloc[0];
3443   ierr = PetscFree(aloc);CHKERRQ(ierr);
3444   if (!row){
3445     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3446   }
3447   if (!col){
3448     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3449   }
3450   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 static PetscEvent logkey_GetBrowsOfAcols = 0;
3455 #undef __FUNCT__
3456 #define __FUNCT__ "MatGetBrowsOfAcols"
3457 /*@C
3458     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3459 
3460     Collective on Mat
3461 
3462    Input Parameters:
3463 +    A,B - the matrices in mpiaij format
3464 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3465 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3466 
3467    Output Parameter:
3468 +    rowb, colb - index sets of rows and columns of B to extract
3469 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3470 -    B_seq - the sequential matrix generated
3471 
3472     Level: developer
3473 
3474 @*/
3475 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3476 {
3477   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3478   PetscErrorCode    ierr;
3479   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3480   IS                isrowb,iscolb;
3481   Mat               *bseq;
3482 
3483   PetscFunctionBegin;
3484   if (a->cstart != b->rstart || a->cend != b->rend){
3485     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3486   }
3487   if (!logkey_GetBrowsOfAcols) {
3488     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3489   }
3490   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3491 
3492   if (scall == MAT_INITIAL_MATRIX){
3493     start = a->cstart;
3494     cmap  = a->garray;
3495     nzA   = a->A->n;
3496     nzB   = a->B->n;
3497     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3498     ncols = 0;
3499     for (i=0; i<nzB; i++) {  /* row < local row index */
3500       if (cmap[i] < start) idx[ncols++] = cmap[i];
3501       else break;
3502     }
3503     imark = i;
3504     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3505     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3506     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3507     ierr = PetscFree(idx);CHKERRQ(ierr);
3508     *brstart = imark;
3509     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3510   } else {
3511     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3512     isrowb = *rowb; iscolb = *colb;
3513     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3514     bseq[0] = *B_seq;
3515   }
3516   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3517   *B_seq = bseq[0];
3518   ierr = PetscFree(bseq);CHKERRQ(ierr);
3519   if (!rowb){
3520     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3521   } else {
3522     *rowb = isrowb;
3523   }
3524   if (!colb){
3525     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3526   } else {
3527     *colb = iscolb;
3528   }
3529   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 static PetscEvent logkey_GetBrowsOfAocols = 0;
3534 #undef __FUNCT__
3535 #define __FUNCT__ "MatGetBrowsOfAoCols"
3536 /*@C
3537     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3538     of the OFF-DIAGONAL portion of local A
3539 
3540     Collective on Mat
3541 
3542    Input Parameters:
3543 +    A,B - the matrices in mpiaij format
3544 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3545 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3546 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3547 
3548    Output Parameter:
3549 +    B_oth - the sequential matrix generated
3550 
3551     Level: developer
3552 
3553 @*/
3554 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3555 {
3556   VecScatter_MPI_General *gen_to,*gen_from;
3557   PetscErrorCode         ierr;
3558   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3559   Mat_SeqAIJ             *b_oth;
3560   VecScatter             ctx=a->Mvctx;
3561   MPI_Comm               comm=ctx->comm;
3562   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3563   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3564   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3565   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3566   MPI_Request            *rwaits,*swaits;
3567   MPI_Status             *sstatus,rstatus;
3568   PetscInt               *cols;
3569   PetscScalar            *vals;
3570   PetscMPIInt            j;
3571 
3572   PetscFunctionBegin;
3573   if (a->cstart != b->rstart || a->cend != b->rend){
3574     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3575   }
3576   if (!logkey_GetBrowsOfAocols) {
3577     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3578   }
3579   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3580   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3581 
3582   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3583   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3584   rvalues  = gen_from->values; /* holds the length of sending row */
3585   svalues  = gen_to->values;   /* holds the length of receiving row */
3586   nrecvs   = gen_from->n;
3587   nsends   = gen_to->n;
3588   rwaits   = gen_from->requests;
3589   swaits   = gen_to->requests;
3590   rrow     = gen_from->indices; /* local row index to be received */
3591   srow     = gen_to->indices;   /* local row index to be sent */
3592   rstarts  = gen_from->starts;
3593   sstarts  = gen_to->starts;
3594   rprocs   = gen_from->procs;
3595   sprocs   = gen_to->procs;
3596   sstatus  = gen_to->sstatus;
3597 
3598   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3599   if (scall == MAT_INITIAL_MATRIX){
3600     /* i-array */
3601     /*---------*/
3602     /*  post receives */
3603     for (i=0; i<nrecvs; i++){
3604       rowlen = (PetscInt*)rvalues + rstarts[i];
3605       nrows = rstarts[i+1]-rstarts[i];
3606       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3607     }
3608 
3609     /* pack the outgoing message */
3610     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3611     rstartsj = sstartsj + nsends +1;
3612     sstartsj[0] = 0;  rstartsj[0] = 0;
3613     len = 0; /* total length of j or a array to be sent */
3614     k = 0;
3615     for (i=0; i<nsends; i++){
3616       rowlen = (PetscInt*)svalues + sstarts[i];
3617       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3618       for (j=0; j<nrows; j++) {
3619         row = srow[k] + b->rowners[rank]; /* global row idx */
3620         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3621         len += rowlen[j];
3622         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3623         k++;
3624       }
3625       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3626        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3627     }
3628     /* recvs and sends of i-array are completed */
3629     i = nrecvs;
3630     while (i--) {
3631       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3632     }
3633     if (nsends) {
3634       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3635     }
3636     /* allocate buffers for sending j and a arrays */
3637     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3638     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3639 
3640     /* create i-array of B_oth */
3641     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3642     b_othi[0] = 0;
3643     len = 0; /* total length of j or a array to be received */
3644     k = 0;
3645     for (i=0; i<nrecvs; i++){
3646       rowlen = (PetscInt*)rvalues + rstarts[i];
3647       nrows = rstarts[i+1]-rstarts[i];
3648       for (j=0; j<nrows; j++) {
3649         b_othi[k+1] = b_othi[k] + rowlen[j];
3650         len += rowlen[j]; k++;
3651       }
3652       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3653     }
3654 
3655     /* allocate space for j and a arrrays of B_oth */
3656     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3657     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3658 
3659     /* j-array */
3660     /*---------*/
3661     /*  post receives of j-array */
3662     for (i=0; i<nrecvs; i++){
3663       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3664       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3665     }
3666     k = 0;
3667     for (i=0; i<nsends; i++){
3668       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3669       bufJ = bufj+sstartsj[i];
3670       for (j=0; j<nrows; j++) {
3671         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3672         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3673         for (l=0; l<ncols; l++){
3674           *bufJ++ = cols[l];
3675         }
3676         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3677       }
3678       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3679     }
3680 
3681     /* recvs and sends of j-array are completed */
3682     i = nrecvs;
3683     while (i--) {
3684       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3685     }
3686     if (nsends) {
3687       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3688     }
3689   } else if (scall == MAT_REUSE_MATRIX){
3690     sstartsj = *startsj;
3691     rstartsj = sstartsj + nsends +1;
3692     bufa     = *bufa_ptr;
3693     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3694     b_otha   = b_oth->a;
3695   } else {
3696     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3697   }
3698 
3699   /* a-array */
3700   /*---------*/
3701   /*  post receives of a-array */
3702   for (i=0; i<nrecvs; i++){
3703     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3704     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3705   }
3706   k = 0;
3707   for (i=0; i<nsends; i++){
3708     nrows = sstarts[i+1]-sstarts[i];
3709     bufA = bufa+sstartsj[i];
3710     for (j=0; j<nrows; j++) {
3711       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3712       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3713       for (l=0; l<ncols; l++){
3714         *bufA++ = vals[l];
3715       }
3716       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3717 
3718     }
3719     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3720   }
3721   /* recvs and sends of a-array are completed */
3722   i = nrecvs;
3723   while (i--) {
3724     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3725   }
3726    if (nsends) {
3727     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3728   }
3729 
3730   if (scall == MAT_INITIAL_MATRIX){
3731     /* put together the new matrix */
3732     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3733 
3734     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3735     /* Since these are PETSc arrays, change flags to free them as necessary. */
3736     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3737     b_oth->freedata = PETSC_TRUE;
3738     b_oth->nonew    = 0;
3739 
3740     ierr = PetscFree(bufj);CHKERRQ(ierr);
3741     if (!startsj || !bufa_ptr){
3742       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3743       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3744     } else {
3745       *startsj  = sstartsj;
3746       *bufa_ptr = bufa;
3747     }
3748   }
3749   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3750 
3751   PetscFunctionReturn(0);
3752 }
3753 
3754 /*MC
3755    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3756 
3757    Options Database Keys:
3758 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3759 
3760   Level: beginner
3761 
3762 .seealso: MatCreateMPIAIJ
3763 M*/
3764 
3765 EXTERN_C_BEGIN
3766 #undef __FUNCT__
3767 #define __FUNCT__ "MatCreate_MPIAIJ"
3768 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3769 {
3770   Mat_MPIAIJ     *b;
3771   PetscErrorCode ierr;
3772   PetscInt       i;
3773   PetscMPIInt    size;
3774 
3775   PetscFunctionBegin;
3776   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3777 
3778   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3779   B->data         = (void*)b;
3780   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3781   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3782   B->factor       = 0;
3783   B->bs           = 1;
3784   B->assembled    = PETSC_FALSE;
3785   B->mapping      = 0;
3786 
3787   B->insertmode      = NOT_SET_VALUES;
3788   b->size            = size;
3789   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3790 
3791   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3792   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3793 
3794   /* the information in the maps duplicates the information computed below, eventually
3795      we should remove the duplicate information that is not contained in the maps */
3796   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3797   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3798 
3799   /* build local table of row and column ownerships */
3800   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3801   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3802   b->cowners = b->rowners + b->size + 2;
3803   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3804   b->rowners[0] = 0;
3805   for (i=2; i<=b->size; i++) {
3806     b->rowners[i] += b->rowners[i-1];
3807   }
3808   b->rstart = b->rowners[b->rank];
3809   b->rend   = b->rowners[b->rank+1];
3810   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3811   b->cowners[0] = 0;
3812   for (i=2; i<=b->size; i++) {
3813     b->cowners[i] += b->cowners[i-1];
3814   }
3815   b->cstart = b->cowners[b->rank];
3816   b->cend   = b->cowners[b->rank+1];
3817 
3818   /* build cache for off array entries formed */
3819   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3820   b->donotstash  = PETSC_FALSE;
3821   b->colmap      = 0;
3822   b->garray      = 0;
3823   b->roworiented = PETSC_TRUE;
3824 
3825   /* stuff used for matrix vector multiply */
3826   b->lvec      = PETSC_NULL;
3827   b->Mvctx     = PETSC_NULL;
3828 
3829   /* stuff for MatGetRow() */
3830   b->rowindices   = 0;
3831   b->rowvalues    = 0;
3832   b->getrowactive = PETSC_FALSE;
3833 
3834   /* Explicitly create 2 MATSEQAIJ matrices. */
3835   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3836   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3837   PetscLogObjectParent(B,b->A);
3838   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3839   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3840   PetscLogObjectParent(B,b->B);
3841 
3842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3843                                      "MatStoreValues_MPIAIJ",
3844                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3846                                      "MatRetrieveValues_MPIAIJ",
3847                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3849 				     "MatGetDiagonalBlock_MPIAIJ",
3850                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3852 				     "MatIsTranspose_MPIAIJ",
3853 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3855 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3856 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3858 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3859 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3861 				     "MatDiagonalScaleLocal_MPIAIJ",
3862 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3863   PetscFunctionReturn(0);
3864 }
3865 EXTERN_C_END
3866