xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3d679f9f2d5a3ac09a9543ad3f4036e3bf027c87)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 EXTERN_C_BEGIN
634 #undef __FUNCT__
635 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
637 {
638   MPI_Comm       comm;
639   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
640   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
641   IS             Me,Notme;
642   PetscErrorCode ierr;
643   PetscInt       M,N,first,last,*notme,i;
644   PetscMPIInt    size;
645 
646   PetscFunctionBegin;
647 
648   /* Easy test: symmetric diagonal block */
649   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
650   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
651   if (!*f) PetscFunctionReturn(0);
652   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
654   if (size == 1) PetscFunctionReturn(0);
655 
656   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
657   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
663   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
664   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
665   Aoff = Aoffs[0];
666   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
667   Boff = Boffs[0];
668   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
670   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
671   ierr = ISDestroy(Me);CHKERRQ(ierr);
672   ierr = ISDestroy(Notme);CHKERRQ(ierr);
673 
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
681 {
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683   PetscErrorCode ierr;
684 
685   PetscFunctionBegin;
686   /* do nondiagonal part */
687   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   PetscErrorCode ierr;
708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
709 
710   PetscFunctionBegin;
711   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
712   if (a->rstart != a->cstart || a->rend != a->cend) {
713     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
714   }
715   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatScale_MPIAIJ"
721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
722 {
723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
728   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPIAIJ"
734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
735 {
736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
747 #if defined (PETSC_USE_CTABLE)
748   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
749 #else
750   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
751 #endif
752   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
753   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
754   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
755   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
756   ierr = PetscFree(aij);CHKERRQ(ierr);
757 
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPIAIJ_Binary"
770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
773   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
774   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
775   PetscErrorCode    ierr;
776   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
777   int               fd;
778   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
779   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
780   PetscScalar       *column_values;
781 
782   PetscFunctionBegin;
783   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
784   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
785   nz   = A->nz + B->nz;
786   if (!rank) {
787     header[0] = MAT_FILE_COOKIE;
788     header[1] = mat->M;
789     header[2] = mat->N;
790     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
791     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
792     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
793     /* get largest number of rows any processor has */
794     rlen = mat->m;
795     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
796     for (i=1; i<size; i++) {
797       rlen = PetscMax(rlen,range[i+1] - range[i]);
798     }
799   } else {
800     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
801     rlen = mat->m;
802   }
803 
804   /* load up the local row counts */
805   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
806   for (i=0; i<mat->m; i++) {
807     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
808   }
809 
810   /* store the row lengths to the file */
811   if (!rank) {
812     MPI_Status status;
813     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     for (i=1; i<size; i++) {
815       rlen = range[i+1] - range[i];
816       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
817       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     }
819   } else {
820     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
823 
824   /* load up the local column indices */
825   nzmax = nz; /* )th processor needs space a largest processor needs */
826   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
827   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
828   cnt  = 0;
829   for (i=0; i<mat->m; i++) {
830     for (j=B->i[i]; j<B->i[i+1]; j++) {
831       if ( (col = garray[B->j[j]]) > cstart) break;
832       column_indices[cnt++] = col;
833     }
834     for (k=A->i[i]; k<A->i[i+1]; k++) {
835       column_indices[cnt++] = A->j[k] + cstart;
836     }
837     for (; j<B->i[i+1]; j++) {
838       column_indices[cnt++] = garray[B->j[j]];
839     }
840   }
841   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
842 
843   /* store the column indices to the file */
844   if (!rank) {
845     MPI_Status status;
846     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     for (i=1; i<size; i++) {
848       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
850       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
852     }
853   } else {
854     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
855     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(column_indices);CHKERRQ(ierr);
858 
859   /* load up the local column values */
860   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
861   cnt  = 0;
862   for (i=0; i<mat->m; i++) {
863     for (j=B->i[i]; j<B->i[i+1]; j++) {
864       if ( garray[B->j[j]] > cstart) break;
865       column_values[cnt++] = B->a[j];
866     }
867     for (k=A->i[i]; k<A->i[i+1]; k++) {
868       column_values[cnt++] = A->a[k];
869     }
870     for (; j<B->i[i+1]; j++) {
871       column_values[cnt++] = B->a[j];
872     }
873   }
874   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
875 
876   /* store the column values to the file */
877   if (!rank) {
878     MPI_Status status;
879     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     for (i=1; i<size; i++) {
881       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
883       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
885     }
886   } else {
887     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
888     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(column_values);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
897 {
898   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
899   PetscErrorCode    ierr;
900   PetscMPIInt       rank = aij->rank,size = aij->size;
901   PetscTruth        isdraw,iascii,flg,isbinary;
902   PetscViewer       sviewer;
903   PetscViewerFormat format;
904 
905   PetscFunctionBegin;
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
909   if (iascii) {
910     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
911     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
912       MatInfo info;
913       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
914       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
915       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
916       if (flg) {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
918 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
919       } else {
920         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
921 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
922       }
923       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
927       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
928       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscFunctionReturn(0);
932     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
933       PetscFunctionReturn(0);
934     }
935   } else if (isbinary) {
936     if (size == 1) {
937       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
938       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
939     } else {
940       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
941     }
942     PetscFunctionReturn(0);
943   } else if (isdraw) {
944     PetscDraw  draw;
945     PetscTruth isnull;
946     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
947     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
948   }
949 
950   if (size == 1) {
951     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
952     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
953   } else {
954     /* assemble the entire matrix onto first processor. */
955     Mat         A;
956     Mat_SeqAIJ  *Aloc;
957     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
958     PetscScalar *a;
959 
960     if (!rank) {
961       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
962     } else {
963       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
964     }
965     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
966     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
967     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
968     PetscLogObjectParent(mat,A);
969 
970     /* copy over the A part */
971     Aloc = (Mat_SeqAIJ*)aij->A->data;
972     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
973     row = aij->rstart;
974     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
978     }
979     aj = Aloc->j;
980     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
981 
982     /* copy over the B part */
983     Aloc = (Mat_SeqAIJ*)aij->B->data;
984     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
985     row  = aij->rstart;
986     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
987     ct   = cols;
988     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
989     for (i=0; i<m; i++) {
990       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
991       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
992     }
993     ierr = PetscFree(ct);CHKERRQ(ierr);
994     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996     /*
997        Everyone has to call to draw the matrix since the graphics waits are
998        synchronized across all processors that share the PetscDraw object
999     */
1000     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1001     if (!rank) {
1002       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1003       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1004     }
1005     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1006     ierr = MatDestroy(A);CHKERRQ(ierr);
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatView_MPIAIJ"
1013 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1014 {
1015   PetscErrorCode ierr;
1016   PetscTruth     iascii,isdraw,issocket,isbinary;
1017 
1018   PetscFunctionBegin;
1019   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1020   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1021   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1022   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1023   if (iascii || isdraw || isbinary || issocket) {
1024     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1025   } else {
1026     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1027   }
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatRelax_MPIAIJ"
1035 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1036 {
1037   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1038   PetscErrorCode ierr;
1039   Vec            bb1;
1040   PetscScalar    mone=-1.0;
1041 
1042   PetscFunctionBegin;
1043   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1044 
1045   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1046 
1047   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052 
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1066     if (flag & SOR_ZERO_INITIAL_GUESS) {
1067       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1068       its--;
1069     }
1070     while (its--) {
1071       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073 
1074       /* update rhs: bb1 = bb - B*x */
1075       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1076       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1077 
1078       /* local sweep */
1079       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1080       CHKERRQ(ierr);
1081     }
1082   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1083     if (flag & SOR_ZERO_INITIAL_GUESS) {
1084       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1085       its--;
1086     }
1087     while (its--) {
1088       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1089       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1090 
1091       /* update rhs: bb1 = bb - B*x */
1092       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1093       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1094 
1095       /* local sweep */
1096       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1097       CHKERRQ(ierr);
1098     }
1099   } else {
1100     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1101   }
1102 
1103   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1109 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1110 {
1111   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1112   Mat            A = mat->A,B = mat->B;
1113   PetscErrorCode ierr;
1114   PetscReal      isend[5],irecv[5];
1115 
1116   PetscFunctionBegin;
1117   info->block_size     = 1.0;
1118   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1119   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1120   isend[3] = info->memory;  isend[4] = info->mallocs;
1121   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1122   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1123   isend[3] += info->memory;  isend[4] += info->mallocs;
1124   if (flag == MAT_LOCAL) {
1125     info->nz_used      = isend[0];
1126     info->nz_allocated = isend[1];
1127     info->nz_unneeded  = isend[2];
1128     info->memory       = isend[3];
1129     info->mallocs      = isend[4];
1130   } else if (flag == MAT_GLOBAL_MAX) {
1131     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1132     info->nz_used      = irecv[0];
1133     info->nz_allocated = irecv[1];
1134     info->nz_unneeded  = irecv[2];
1135     info->memory       = irecv[3];
1136     info->mallocs      = irecv[4];
1137   } else if (flag == MAT_GLOBAL_SUM) {
1138     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1139     info->nz_used      = irecv[0];
1140     info->nz_allocated = irecv[1];
1141     info->nz_unneeded  = irecv[2];
1142     info->memory       = irecv[3];
1143     info->mallocs      = irecv[4];
1144   }
1145   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1146   info->fill_ratio_needed = 0;
1147   info->factor_mallocs    = 0;
1148   info->rows_global       = (double)matin->M;
1149   info->columns_global    = (double)matin->N;
1150   info->rows_local        = (double)matin->m;
1151   info->columns_local     = (double)matin->N;
1152 
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatSetOption_MPIAIJ"
1158 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1159 {
1160   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   switch (op) {
1165   case MAT_NO_NEW_NONZERO_LOCATIONS:
1166   case MAT_YES_NEW_NONZERO_LOCATIONS:
1167   case MAT_COLUMNS_UNSORTED:
1168   case MAT_COLUMNS_SORTED:
1169   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1170   case MAT_KEEP_ZEROED_ROWS:
1171   case MAT_NEW_NONZERO_LOCATION_ERR:
1172   case MAT_USE_INODES:
1173   case MAT_DO_NOT_USE_INODES:
1174   case MAT_IGNORE_ZERO_ENTRIES:
1175     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1176     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1177     break;
1178   case MAT_ROW_ORIENTED:
1179     a->roworiented = PETSC_TRUE;
1180     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1181     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1182     break;
1183   case MAT_ROWS_SORTED:
1184   case MAT_ROWS_UNSORTED:
1185   case MAT_YES_NEW_DIAGONALS:
1186     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1187     break;
1188   case MAT_COLUMN_ORIENTED:
1189     a->roworiented = PETSC_FALSE;
1190     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1191     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1192     break;
1193   case MAT_IGNORE_OFF_PROC_ENTRIES:
1194     a->donotstash = PETSC_TRUE;
1195     break;
1196   case MAT_NO_NEW_DIAGONALS:
1197     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1198   case MAT_SYMMETRIC:
1199   case MAT_STRUCTURALLY_SYMMETRIC:
1200   case MAT_HERMITIAN:
1201   case MAT_SYMMETRY_ETERNAL:
1202     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1203     break;
1204   case MAT_NOT_SYMMETRIC:
1205   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1206   case MAT_NOT_HERMITIAN:
1207   case MAT_NOT_SYMMETRY_ETERNAL:
1208     break;
1209   default:
1210     SETERRQ(PETSC_ERR_SUP,"unknown option");
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatGetRow_MPIAIJ"
1217 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1218 {
1219   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1220   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1221   PetscErrorCode ierr;
1222   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1223   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1224   PetscInt       *cmap,*idx_p;
1225 
1226   PetscFunctionBegin;
1227   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1228   mat->getrowactive = PETSC_TRUE;
1229 
1230   if (!mat->rowvalues && (idx || v)) {
1231     /*
1232         allocate enough space to hold information from the longest row.
1233     */
1234     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1235     PetscInt     max = 1,tmp;
1236     for (i=0; i<matin->m; i++) {
1237       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1238       if (max < tmp) { max = tmp; }
1239     }
1240     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1241     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1242   }
1243 
1244   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1245   lrow = row - rstart;
1246 
1247   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1248   if (!v)   {pvA = 0; pvB = 0;}
1249   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1250   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1251   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1252   nztot = nzA + nzB;
1253 
1254   cmap  = mat->garray;
1255   if (v  || idx) {
1256     if (nztot) {
1257       /* Sort by increasing column numbers, assuming A and B already sorted */
1258       PetscInt imark = -1;
1259       if (v) {
1260         *v = v_p = mat->rowvalues;
1261         for (i=0; i<nzB; i++) {
1262           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1263           else break;
1264         }
1265         imark = i;
1266         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1267         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1268       }
1269       if (idx) {
1270         *idx = idx_p = mat->rowindices;
1271         if (imark > -1) {
1272           for (i=0; i<imark; i++) {
1273             idx_p[i] = cmap[cworkB[i]];
1274           }
1275         } else {
1276           for (i=0; i<nzB; i++) {
1277             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1278             else break;
1279           }
1280           imark = i;
1281         }
1282         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1283         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1284       }
1285     } else {
1286       if (idx) *idx = 0;
1287       if (v)   *v   = 0;
1288     }
1289   }
1290   *nz = nztot;
1291   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1298 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1299 {
1300   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1301 
1302   PetscFunctionBegin;
1303   if (!aij->getrowactive) {
1304     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1305   }
1306   aij->getrowactive = PETSC_FALSE;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatNorm_MPIAIJ"
1312 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1313 {
1314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1316   PetscErrorCode ierr;
1317   PetscInt       i,j,cstart = aij->cstart;
1318   PetscReal      sum = 0.0;
1319   PetscScalar    *v;
1320 
1321   PetscFunctionBegin;
1322   if (aij->size == 1) {
1323     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1324   } else {
1325     if (type == NORM_FROBENIUS) {
1326       v = amat->a;
1327       for (i=0; i<amat->nz; i++) {
1328 #if defined(PETSC_USE_COMPLEX)
1329         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1330 #else
1331         sum += (*v)*(*v); v++;
1332 #endif
1333       }
1334       v = bmat->a;
1335       for (i=0; i<bmat->nz; i++) {
1336 #if defined(PETSC_USE_COMPLEX)
1337         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1338 #else
1339         sum += (*v)*(*v); v++;
1340 #endif
1341       }
1342       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1343       *norm = sqrt(*norm);
1344     } else if (type == NORM_1) { /* max column norm */
1345       PetscReal *tmp,*tmp2;
1346       PetscInt    *jj,*garray = aij->garray;
1347       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1348       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1349       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1350       *norm = 0.0;
1351       v = amat->a; jj = amat->j;
1352       for (j=0; j<amat->nz; j++) {
1353         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1354       }
1355       v = bmat->a; jj = bmat->j;
1356       for (j=0; j<bmat->nz; j++) {
1357         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1358       }
1359       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1360       for (j=0; j<mat->N; j++) {
1361         if (tmp2[j] > *norm) *norm = tmp2[j];
1362       }
1363       ierr = PetscFree(tmp);CHKERRQ(ierr);
1364       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1365     } else if (type == NORM_INFINITY) { /* max row norm */
1366       PetscReal ntemp = 0.0;
1367       for (j=0; j<aij->A->m; j++) {
1368         v = amat->a + amat->i[j];
1369         sum = 0.0;
1370         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         v = bmat->a + bmat->i[j];
1374         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1375           sum += PetscAbsScalar(*v); v++;
1376         }
1377         if (sum > ntemp) ntemp = sum;
1378       }
1379       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1380     } else {
1381       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1382     }
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatTranspose_MPIAIJ"
1389 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1390 {
1391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1392   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1393   PetscErrorCode ierr;
1394   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1395   Mat            B;
1396   PetscScalar    *array;
1397 
1398   PetscFunctionBegin;
1399   if (!matout && M != N) {
1400     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1401   }
1402 
1403   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1404   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1405   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1406 
1407   /* copy over the A part */
1408   Aloc = (Mat_SeqAIJ*)a->A->data;
1409   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1410   row = a->rstart;
1411   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1412   for (i=0; i<m; i++) {
1413     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1414     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1415   }
1416   aj = Aloc->j;
1417   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1418 
1419   /* copy over the B part */
1420   Aloc = (Mat_SeqAIJ*)a->B->data;
1421   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1422   row  = a->rstart;
1423   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1424   ct   = cols;
1425   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1426   for (i=0; i<m; i++) {
1427     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1428     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1429   }
1430   ierr = PetscFree(ct);CHKERRQ(ierr);
1431   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1433   if (matout) {
1434     *matout = B;
1435   } else {
1436     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1443 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1444 {
1445   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1446   Mat            a = aij->A,b = aij->B;
1447   PetscErrorCode ierr;
1448   PetscInt       s1,s2,s3;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1452   if (rr) {
1453     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1454     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1455     /* Overlap communication with computation. */
1456     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1457   }
1458   if (ll) {
1459     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1460     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1461     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1462   }
1463   /* scale  the diagonal block */
1464   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1465 
1466   if (rr) {
1467     /* Do a scatter end and then right scale the off-diagonal block */
1468     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1469     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1470   }
1471 
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1478 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1479 {
1480   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   if (!a->rank) {
1485     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1492 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1493 {
1494   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1499   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1504 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1505 {
1506   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatEqual_MPIAIJ"
1516 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1517 {
1518   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1519   Mat            a,b,c,d;
1520   PetscTruth     flg;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   a = matA->A; b = matA->B;
1525   c = matB->A; d = matB->B;
1526 
1527   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1528   if (flg) {
1529     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1530   }
1531   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatCopy_MPIAIJ"
1537 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1538 {
1539   PetscErrorCode ierr;
1540   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1541   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1542 
1543   PetscFunctionBegin;
1544   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1545   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1546     /* because of the column compression in the off-processor part of the matrix a->B,
1547        the number of columns in a->B and b->B may be different, hence we cannot call
1548        the MatCopy() directly on the two parts. If need be, we can provide a more
1549        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1550        then copying the submatrices */
1551     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1552   } else {
1553     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1554     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1561 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1562 {
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #include "petscblaslapack.h"
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatAXPY_MPIAIJ"
1573 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1574 {
1575   PetscErrorCode ierr;
1576   PetscInt       i;
1577   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1578   PetscBLASInt   bnz,one=1;
1579   Mat_SeqAIJ     *x,*y;
1580 
1581   PetscFunctionBegin;
1582   if (str == SAME_NONZERO_PATTERN) {
1583     x = (Mat_SeqAIJ *)xx->A->data;
1584     y = (Mat_SeqAIJ *)yy->A->data;
1585     bnz = (PetscBLASInt)x->nz;
1586     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587     x = (Mat_SeqAIJ *)xx->B->data;
1588     y = (Mat_SeqAIJ *)yy->B->data;
1589     bnz = (PetscBLASInt)x->nz;
1590     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1591   } else if (str == SUBSET_NONZERO_PATTERN) {
1592     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1593 
1594     x = (Mat_SeqAIJ *)xx->B->data;
1595     y = (Mat_SeqAIJ *)yy->B->data;
1596     if (y->xtoy && y->XtoY != xx->B) {
1597       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1598       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1599     }
1600     if (!y->xtoy) { /* get xtoy */
1601       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1602       y->XtoY = xx->B;
1603     }
1604     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1605   } else {
1606     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /* -------------------------------------------------------------------*/
1612 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1613        MatGetRow_MPIAIJ,
1614        MatRestoreRow_MPIAIJ,
1615        MatMult_MPIAIJ,
1616 /* 4*/ MatMultAdd_MPIAIJ,
1617        MatMultTranspose_MPIAIJ,
1618        MatMultTransposeAdd_MPIAIJ,
1619        0,
1620        0,
1621        0,
1622 /*10*/ 0,
1623        0,
1624        0,
1625        MatRelax_MPIAIJ,
1626        MatTranspose_MPIAIJ,
1627 /*15*/ MatGetInfo_MPIAIJ,
1628        MatEqual_MPIAIJ,
1629        MatGetDiagonal_MPIAIJ,
1630        MatDiagonalScale_MPIAIJ,
1631        MatNorm_MPIAIJ,
1632 /*20*/ MatAssemblyBegin_MPIAIJ,
1633        MatAssemblyEnd_MPIAIJ,
1634        0,
1635        MatSetOption_MPIAIJ,
1636        MatZeroEntries_MPIAIJ,
1637 /*25*/ MatZeroRows_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*30*/ MatSetUpPreallocation_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*35*/ MatDuplicate_MPIAIJ,
1648        0,
1649        0,
1650        0,
1651        0,
1652 /*40*/ MatAXPY_MPIAIJ,
1653        MatGetSubMatrices_MPIAIJ,
1654        MatIncreaseOverlap_MPIAIJ,
1655        MatGetValues_MPIAIJ,
1656        MatCopy_MPIAIJ,
1657 /*45*/ MatPrintHelp_MPIAIJ,
1658        MatScale_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662 /*50*/ MatSetBlockSize_MPIAIJ,
1663        0,
1664        0,
1665        0,
1666        0,
1667 /*55*/ MatFDColoringCreate_MPIAIJ,
1668        0,
1669        MatSetUnfactored_MPIAIJ,
1670        0,
1671        0,
1672 /*60*/ MatGetSubMatrix_MPIAIJ,
1673        MatDestroy_MPIAIJ,
1674        MatView_MPIAIJ,
1675        MatGetPetscMaps_Petsc,
1676        0,
1677 /*65*/ 0,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*70*/ 0,
1683        0,
1684        MatSetColoring_MPIAIJ,
1685 #if defined(PETSC_HAVE_ADIC)
1686        MatSetValuesAdic_MPIAIJ,
1687 #else
1688        0,
1689 #endif
1690        MatSetValuesAdifor_MPIAIJ,
1691 /*75*/ 0,
1692        0,
1693        0,
1694        0,
1695        0,
1696 /*80*/ 0,
1697        0,
1698        0,
1699        0,
1700 /*84*/ MatLoad_MPIAIJ,
1701        0,
1702        0,
1703        0,
1704        0,
1705        0,
1706 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1707        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1708        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1709        MatPtAP_MPIAIJ_MPIAIJ,
1710        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1711 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1712        0,
1713        0,
1714        0};
1715 
1716 /* ----------------------------------------------------------------------------------------*/
1717 
1718 EXTERN_C_BEGIN
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1721 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1722 {
1723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1728   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 EXTERN_C_END
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1736 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1737 {
1738   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1743   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 EXTERN_C_END
1747 
1748 #include "petscpc.h"
1749 EXTERN_C_BEGIN
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1752 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1753 {
1754   Mat_MPIAIJ     *b;
1755   PetscErrorCode ierr;
1756   PetscInt       i;
1757 
1758   PetscFunctionBegin;
1759   B->preallocated = PETSC_TRUE;
1760   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1761   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1762   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1763   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1764   if (d_nnz) {
1765     for (i=0; i<B->m; i++) {
1766       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1767     }
1768   }
1769   if (o_nnz) {
1770     for (i=0; i<B->m; i++) {
1771       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1772     }
1773   }
1774   b = (Mat_MPIAIJ*)B->data;
1775   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1776   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1777 
1778   PetscFunctionReturn(0);
1779 }
1780 EXTERN_C_END
1781 
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1784 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1785 {
1786   Mat            mat;
1787   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   *newmat       = 0;
1792   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1793   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1794   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1795   a    = (Mat_MPIAIJ*)mat->data;
1796 
1797   mat->factor       = matin->factor;
1798   mat->bs           = matin->bs;
1799   mat->assembled    = PETSC_TRUE;
1800   mat->insertmode   = NOT_SET_VALUES;
1801   mat->preallocated = PETSC_TRUE;
1802 
1803   a->rstart       = oldmat->rstart;
1804   a->rend         = oldmat->rend;
1805   a->cstart       = oldmat->cstart;
1806   a->cend         = oldmat->cend;
1807   a->size         = oldmat->size;
1808   a->rank         = oldmat->rank;
1809   a->donotstash   = oldmat->donotstash;
1810   a->roworiented  = oldmat->roworiented;
1811   a->rowindices   = 0;
1812   a->rowvalues    = 0;
1813   a->getrowactive = PETSC_FALSE;
1814 
1815   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1816   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1817   if (oldmat->colmap) {
1818 #if defined (PETSC_USE_CTABLE)
1819     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1820 #else
1821     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1822     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1823     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1824 #endif
1825   } else a->colmap = 0;
1826   if (oldmat->garray) {
1827     PetscInt len;
1828     len  = oldmat->B->n;
1829     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1830     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1831     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1832   } else a->garray = 0;
1833 
1834   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1835   PetscLogObjectParent(mat,a->lvec);
1836   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1837   PetscLogObjectParent(mat,a->Mvctx);
1838   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1839   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1840   PetscLogObjectParent(mat,a->A);
1841   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1842   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1843   PetscLogObjectParent(mat,a->B);
1844   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1845   *newmat = mat;
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #include "petscsys.h"
1850 
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatLoad_MPIAIJ"
1853 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1854 {
1855   Mat            A;
1856   PetscScalar    *vals,*svals;
1857   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1858   MPI_Status     status;
1859   PetscErrorCode ierr;
1860   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1861   PetscInt       i,nz,j,rstart,rend;
1862   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1863   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1864   PetscInt       cend,cstart,n,*rowners;
1865   int            fd;
1866 
1867   PetscFunctionBegin;
1868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1870   if (!rank) {
1871     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1872     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1873     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1874   }
1875 
1876   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1877   M = header[1]; N = header[2];
1878   /* determine ownership of all rows */
1879   m    = M/size + ((M % size) > rank);
1880   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1881   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1882   rowners[0] = 0;
1883   for (i=2; i<=size; i++) {
1884     rowners[i] += rowners[i-1];
1885   }
1886   rstart = rowners[rank];
1887   rend   = rowners[rank+1];
1888 
1889   /* distribute row lengths to all processors */
1890   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1891   if (!rank) {
1892     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1893     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1894     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1895     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1896     for (j=0; j<m; j++) {
1897       procsnz[0] += ourlens[j];
1898     }
1899     for (i=1; i<size; i++) {
1900       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1901       /* calculate the number of nonzeros on each processor */
1902       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1903         procsnz[i] += rowlengths[j];
1904       }
1905       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1906     }
1907     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1908   } else {
1909     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1910   }
1911 
1912   if (!rank) {
1913     /* determine max buffer needed and allocate it */
1914     maxnz = 0;
1915     for (i=0; i<size; i++) {
1916       maxnz = PetscMax(maxnz,procsnz[i]);
1917     }
1918     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1919 
1920     /* read in my part of the matrix column indices  */
1921     nz   = procsnz[0];
1922     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1923     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1924 
1925     /* read in every one elses and ship off */
1926     for (i=1; i<size; i++) {
1927       nz   = procsnz[i];
1928       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1929       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1930     }
1931     ierr = PetscFree(cols);CHKERRQ(ierr);
1932   } else {
1933     /* determine buffer space needed for message */
1934     nz = 0;
1935     for (i=0; i<m; i++) {
1936       nz += ourlens[i];
1937     }
1938     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1939 
1940     /* receive message of column indices*/
1941     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1942     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1943     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1944   }
1945 
1946   /* determine column ownership if matrix is not square */
1947   if (N != M) {
1948     n      = N/size + ((N % size) > rank);
1949     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1950     cstart = cend - n;
1951   } else {
1952     cstart = rstart;
1953     cend   = rend;
1954     n      = cend - cstart;
1955   }
1956 
1957   /* loop over local rows, determining number of off diagonal entries */
1958   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1959   jj = 0;
1960   for (i=0; i<m; i++) {
1961     for (j=0; j<ourlens[i]; j++) {
1962       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1963       jj++;
1964     }
1965   }
1966 
1967   /* create our matrix */
1968   for (i=0; i<m; i++) {
1969     ourlens[i] -= offlens[i];
1970   }
1971   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1972   ierr = MatSetType(A,type);CHKERRQ(ierr);
1973   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1974 
1975   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1976   for (i=0; i<m; i++) {
1977     ourlens[i] += offlens[i];
1978   }
1979 
1980   if (!rank) {
1981     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1982 
1983     /* read in my part of the matrix numerical values  */
1984     nz   = procsnz[0];
1985     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1986 
1987     /* insert into matrix */
1988     jj      = rstart;
1989     smycols = mycols;
1990     svals   = vals;
1991     for (i=0; i<m; i++) {
1992       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1993       smycols += ourlens[i];
1994       svals   += ourlens[i];
1995       jj++;
1996     }
1997 
1998     /* read in other processors and ship out */
1999     for (i=1; i<size; i++) {
2000       nz   = procsnz[i];
2001       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2002       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2003     }
2004     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2005   } else {
2006     /* receive numeric values */
2007     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2008 
2009     /* receive message of values*/
2010     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2011     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2012     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2013 
2014     /* insert into matrix */
2015     jj      = rstart;
2016     smycols = mycols;
2017     svals   = vals;
2018     for (i=0; i<m; i++) {
2019       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2020       smycols += ourlens[i];
2021       svals   += ourlens[i];
2022       jj++;
2023     }
2024   }
2025   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2026   ierr = PetscFree(vals);CHKERRQ(ierr);
2027   ierr = PetscFree(mycols);CHKERRQ(ierr);
2028   ierr = PetscFree(rowners);CHKERRQ(ierr);
2029 
2030   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   *newmat = A;
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2038 /*
2039     Not great since it makes two copies of the submatrix, first an SeqAIJ
2040   in local and then by concatenating the local matrices the end result.
2041   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2042 */
2043 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2044 {
2045   PetscErrorCode ierr;
2046   PetscMPIInt    rank,size;
2047   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2048   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2049   Mat            *local,M,Mreuse;
2050   PetscScalar    *vwork,*aa;
2051   MPI_Comm       comm = mat->comm;
2052   Mat_SeqAIJ     *aij;
2053 
2054 
2055   PetscFunctionBegin;
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058 
2059   if (call ==  MAT_REUSE_MATRIX) {
2060     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2061     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2062     local = &Mreuse;
2063     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2064   } else {
2065     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2066     Mreuse = *local;
2067     ierr   = PetscFree(local);CHKERRQ(ierr);
2068   }
2069 
2070   /*
2071       m - number of local rows
2072       n - number of columns (same on all processors)
2073       rstart - first row in new global matrix generated
2074   */
2075   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2076   if (call == MAT_INITIAL_MATRIX) {
2077     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2078     ii  = aij->i;
2079     jj  = aij->j;
2080 
2081     /*
2082         Determine the number of non-zeros in the diagonal and off-diagonal
2083         portions of the matrix in order to do correct preallocation
2084     */
2085 
2086     /* first get start and end of "diagonal" columns */
2087     if (csize == PETSC_DECIDE) {
2088       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2089       if (mglobal == n) { /* square matrix */
2090 	nlocal = m;
2091       } else {
2092         nlocal = n/size + ((n % size) > rank);
2093       }
2094     } else {
2095       nlocal = csize;
2096     }
2097     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2098     rstart = rend - nlocal;
2099     if (rank == size - 1 && rend != n) {
2100       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2101     }
2102 
2103     /* next, compute all the lengths */
2104     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2105     olens = dlens + m;
2106     for (i=0; i<m; i++) {
2107       jend = ii[i+1] - ii[i];
2108       olen = 0;
2109       dlen = 0;
2110       for (j=0; j<jend; j++) {
2111         if (*jj < rstart || *jj >= rend) olen++;
2112         else dlen++;
2113         jj++;
2114       }
2115       olens[i] = olen;
2116       dlens[i] = dlen;
2117     }
2118     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2119     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2120     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2121     ierr = PetscFree(dlens);CHKERRQ(ierr);
2122   } else {
2123     PetscInt ml,nl;
2124 
2125     M = *newmat;
2126     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2127     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2128     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2129     /*
2130          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2131        rather than the slower MatSetValues().
2132     */
2133     M->was_assembled = PETSC_TRUE;
2134     M->assembled     = PETSC_FALSE;
2135   }
2136   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2137   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2138   ii  = aij->i;
2139   jj  = aij->j;
2140   aa  = aij->a;
2141   for (i=0; i<m; i++) {
2142     row   = rstart + i;
2143     nz    = ii[i+1] - ii[i];
2144     cwork = jj;     jj += nz;
2145     vwork = aa;     aa += nz;
2146     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2147   }
2148 
2149   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   *newmat = M;
2152 
2153   /* save submatrix used in processor for next request */
2154   if (call ==  MAT_INITIAL_MATRIX) {
2155     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2156     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2157   }
2158 
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 EXTERN_C_BEGIN
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2165 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2166 {
2167   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2168   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2169   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2170   const PetscInt *JJ;
2171   PetscScalar    *values;
2172   PetscErrorCode ierr;
2173 
2174   PetscFunctionBegin;
2175 #if defined(PETSC_OPT_g)
2176   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2177 #endif
2178   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2179   o_nnz = d_nnz + m;
2180 
2181   for (i=0; i<m; i++) {
2182     nnz     = I[i+1]- I[i];
2183     JJ      = J + I[i];
2184     nnz_max = PetscMax(nnz_max,nnz);
2185 #if defined(PETSC_OPT_g)
2186     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2187 #endif
2188     for (j=0; j<nnz; j++) {
2189       if (*JJ >= cstart) break;
2190       JJ++;
2191     }
2192     d = 0;
2193     for (; j<nnz; j++) {
2194       if (*JJ++ >= cend) break;
2195       d++;
2196     }
2197     d_nnz[i] = d;
2198     o_nnz[i] = nnz - d;
2199   }
2200   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2201   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2202 
2203   if (v) values = (PetscScalar*)v;
2204   else {
2205     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2206     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2207   }
2208 
2209   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2210   for (i=0; i<m; i++) {
2211     ii   = i + rstart;
2212     nnz  = I[i+1]- I[i];
2213     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2214   }
2215   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2216   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2217   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2218 
2219   if (!v) {
2220     ierr = PetscFree(values);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 EXTERN_C_END
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2228 /*@C
2229    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2230    (the default parallel PETSc format).
2231 
2232    Collective on MPI_Comm
2233 
2234    Input Parameters:
2235 +  A - the matrix
2236 .  i - the indices into j for the start of each local row (starts with zero)
2237 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2238 -  v - optional values in the matrix
2239 
2240    Level: developer
2241 
2242 .keywords: matrix, aij, compressed row, sparse, parallel
2243 
2244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2245 @*/
2246 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2247 {
2248   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2249 
2250   PetscFunctionBegin;
2251   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2252   if (f) {
2253     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2254   }
2255   PetscFunctionReturn(0);
2256 }
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2260 /*@C
2261    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2262    (the default parallel PETSc format).  For good matrix assembly performance
2263    the user should preallocate the matrix storage by setting the parameters
2264    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2265    performance can be increased by more than a factor of 50.
2266 
2267    Collective on MPI_Comm
2268 
2269    Input Parameters:
2270 +  A - the matrix
2271 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2272            (same value is used for all local rows)
2273 .  d_nnz - array containing the number of nonzeros in the various rows of the
2274            DIAGONAL portion of the local submatrix (possibly different for each row)
2275            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2276            The size of this array is equal to the number of local rows, i.e 'm'.
2277            You must leave room for the diagonal entry even if it is zero.
2278 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2279            submatrix (same value is used for all local rows).
2280 -  o_nnz - array containing the number of nonzeros in the various rows of the
2281            OFF-DIAGONAL portion of the local submatrix (possibly different for
2282            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2283            structure. The size of this array is equal to the number
2284            of local rows, i.e 'm'.
2285 
2286    If the *_nnz parameter is given then the *_nz parameter is ignored
2287 
2288    The AIJ format (also called the Yale sparse matrix format or
2289    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2290    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2291 
2292    The parallel matrix is partitioned such that the first m0 rows belong to
2293    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2294    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2295 
2296    The DIAGONAL portion of the local submatrix of a processor can be defined
2297    as the submatrix which is obtained by extraction the part corresponding
2298    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2299    first row that belongs to the processor, and r2 is the last row belonging
2300    to the this processor. This is a square mxm matrix. The remaining portion
2301    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2302 
2303    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2304 
2305    Example usage:
2306 
2307    Consider the following 8x8 matrix with 34 non-zero values, that is
2308    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2309    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2310    as follows:
2311 
2312 .vb
2313             1  2  0  |  0  3  0  |  0  4
2314     Proc0   0  5  6  |  7  0  0  |  8  0
2315             9  0 10  | 11  0  0  | 12  0
2316     -------------------------------------
2317            13  0 14  | 15 16 17  |  0  0
2318     Proc1   0 18  0  | 19 20 21  |  0  0
2319             0  0  0  | 22 23  0  | 24  0
2320     -------------------------------------
2321     Proc2  25 26 27  |  0  0 28  | 29  0
2322            30  0  0  | 31 32 33  |  0 34
2323 .ve
2324 
2325    This can be represented as a collection of submatrices as:
2326 
2327 .vb
2328       A B C
2329       D E F
2330       G H I
2331 .ve
2332 
2333    Where the submatrices A,B,C are owned by proc0, D,E,F are
2334    owned by proc1, G,H,I are owned by proc2.
2335 
2336    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2337    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2338    The 'M','N' parameters are 8,8, and have the same values on all procs.
2339 
2340    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2341    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2342    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2343    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2344    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2345    matrix, ans [DF] as another SeqAIJ matrix.
2346 
2347    When d_nz, o_nz parameters are specified, d_nz storage elements are
2348    allocated for every row of the local diagonal submatrix, and o_nz
2349    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2350    One way to choose d_nz and o_nz is to use the max nonzerors per local
2351    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2352    In this case, the values of d_nz,o_nz are:
2353 .vb
2354      proc0 : dnz = 2, o_nz = 2
2355      proc1 : dnz = 3, o_nz = 2
2356      proc2 : dnz = 1, o_nz = 4
2357 .ve
2358    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2359    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2360    for proc3. i.e we are using 12+15+10=37 storage locations to store
2361    34 values.
2362 
2363    When d_nnz, o_nnz parameters are specified, the storage is specified
2364    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2365    In the above case the values for d_nnz,o_nnz are:
2366 .vb
2367      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2368      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2369      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2370 .ve
2371    Here the space allocated is sum of all the above values i.e 34, and
2372    hence pre-allocation is perfect.
2373 
2374    Level: intermediate
2375 
2376 .keywords: matrix, aij, compressed row, sparse, parallel
2377 
2378 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2379           MPIAIJ
2380 @*/
2381 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2382 {
2383   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2384 
2385   PetscFunctionBegin;
2386   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2387   if (f) {
2388     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatCreateMPIAIJ"
2395 /*@C
2396    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2397    (the default parallel PETSc format).  For good matrix assembly performance
2398    the user should preallocate the matrix storage by setting the parameters
2399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2400    performance can be increased by more than a factor of 50.
2401 
2402    Collective on MPI_Comm
2403 
2404    Input Parameters:
2405 +  comm - MPI communicator
2406 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2407            This value should be the same as the local size used in creating the
2408            y vector for the matrix-vector product y = Ax.
2409 .  n - This value should be the same as the local size used in creating the
2410        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2411        calculated if N is given) For square matrices n is almost always m.
2412 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2413 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2414 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2415            (same value is used for all local rows)
2416 .  d_nnz - array containing the number of nonzeros in the various rows of the
2417            DIAGONAL portion of the local submatrix (possibly different for each row)
2418            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2419            The size of this array is equal to the number of local rows, i.e 'm'.
2420            You must leave room for the diagonal entry even if it is zero.
2421 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2422            submatrix (same value is used for all local rows).
2423 -  o_nnz - array containing the number of nonzeros in the various rows of the
2424            OFF-DIAGONAL portion of the local submatrix (possibly different for
2425            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2426            structure. The size of this array is equal to the number
2427            of local rows, i.e 'm'.
2428 
2429    Output Parameter:
2430 .  A - the matrix
2431 
2432    Notes:
2433    If the *_nnz parameter is given then the *_nz parameter is ignored
2434 
2435    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2436    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2437    storage requirements for this matrix.
2438 
2439    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2440    processor than it must be used on all processors that share the object for
2441    that argument.
2442 
2443    The user MUST specify either the local or global matrix dimensions
2444    (possibly both).
2445 
2446    The parallel matrix is partitioned such that the first m0 rows belong to
2447    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2448    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2449 
2450    The DIAGONAL portion of the local submatrix of a processor can be defined
2451    as the submatrix which is obtained by extraction the part corresponding
2452    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2453    first row that belongs to the processor, and r2 is the last row belonging
2454    to the this processor. This is a square mxm matrix. The remaining portion
2455    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2456 
2457    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2458 
2459    When calling this routine with a single process communicator, a matrix of
2460    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2461    type of communicator, use the construction mechanism:
2462      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2463 
2464    By default, this format uses inodes (identical nodes) when possible.
2465    We search for consecutive rows with the same nonzero structure, thereby
2466    reusing matrix information to achieve increased efficiency.
2467 
2468    Options Database Keys:
2469 +  -mat_aij_no_inode  - Do not use inodes
2470 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2471 -  -mat_aij_oneindex - Internally use indexing starting at 1
2472         rather than 0.  Note that when calling MatSetValues(),
2473         the user still MUST index entries starting at 0!
2474 
2475 
2476    Example usage:
2477 
2478    Consider the following 8x8 matrix with 34 non-zero values, that is
2479    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2480    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2481    as follows:
2482 
2483 .vb
2484             1  2  0  |  0  3  0  |  0  4
2485     Proc0   0  5  6  |  7  0  0  |  8  0
2486             9  0 10  | 11  0  0  | 12  0
2487     -------------------------------------
2488            13  0 14  | 15 16 17  |  0  0
2489     Proc1   0 18  0  | 19 20 21  |  0  0
2490             0  0  0  | 22 23  0  | 24  0
2491     -------------------------------------
2492     Proc2  25 26 27  |  0  0 28  | 29  0
2493            30  0  0  | 31 32 33  |  0 34
2494 .ve
2495 
2496    This can be represented as a collection of submatrices as:
2497 
2498 .vb
2499       A B C
2500       D E F
2501       G H I
2502 .ve
2503 
2504    Where the submatrices A,B,C are owned by proc0, D,E,F are
2505    owned by proc1, G,H,I are owned by proc2.
2506 
2507    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2508    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2509    The 'M','N' parameters are 8,8, and have the same values on all procs.
2510 
2511    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2512    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2513    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2514    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2515    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2516    matrix, ans [DF] as another SeqAIJ matrix.
2517 
2518    When d_nz, o_nz parameters are specified, d_nz storage elements are
2519    allocated for every row of the local diagonal submatrix, and o_nz
2520    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2521    One way to choose d_nz and o_nz is to use the max nonzerors per local
2522    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2523    In this case, the values of d_nz,o_nz are:
2524 .vb
2525      proc0 : dnz = 2, o_nz = 2
2526      proc1 : dnz = 3, o_nz = 2
2527      proc2 : dnz = 1, o_nz = 4
2528 .ve
2529    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2530    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2531    for proc3. i.e we are using 12+15+10=37 storage locations to store
2532    34 values.
2533 
2534    When d_nnz, o_nnz parameters are specified, the storage is specified
2535    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2536    In the above case the values for d_nnz,o_nnz are:
2537 .vb
2538      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2539      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2540      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2541 .ve
2542    Here the space allocated is sum of all the above values i.e 34, and
2543    hence pre-allocation is perfect.
2544 
2545    Level: intermediate
2546 
2547 .keywords: matrix, aij, compressed row, sparse, parallel
2548 
2549 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2550           MPIAIJ
2551 @*/
2552 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2553 {
2554   PetscErrorCode ierr;
2555   PetscMPIInt    size;
2556 
2557   PetscFunctionBegin;
2558   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2559   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2560   if (size > 1) {
2561     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2562     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2563   } else {
2564     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2565     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2566   }
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 #undef __FUNCT__
2571 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2572 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2573 {
2574   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2575 
2576   PetscFunctionBegin;
2577   *Ad     = a->A;
2578   *Ao     = a->B;
2579   *colmap = a->garray;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 #undef __FUNCT__
2584 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2585 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2586 {
2587   PetscErrorCode ierr;
2588   PetscInt       i;
2589   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2590 
2591   PetscFunctionBegin;
2592   if (coloring->ctype == IS_COLORING_LOCAL) {
2593     ISColoringValue *allcolors,*colors;
2594     ISColoring      ocoloring;
2595 
2596     /* set coloring for diagonal portion */
2597     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2598 
2599     /* set coloring for off-diagonal portion */
2600     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2601     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2602     for (i=0; i<a->B->n; i++) {
2603       colors[i] = allcolors[a->garray[i]];
2604     }
2605     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2606     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2607     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2608     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2609   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2610     ISColoringValue *colors;
2611     PetscInt             *larray;
2612     ISColoring      ocoloring;
2613 
2614     /* set coloring for diagonal portion */
2615     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2616     for (i=0; i<a->A->n; i++) {
2617       larray[i] = i + a->cstart;
2618     }
2619     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2620     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2621     for (i=0; i<a->A->n; i++) {
2622       colors[i] = coloring->colors[larray[i]];
2623     }
2624     ierr = PetscFree(larray);CHKERRQ(ierr);
2625     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2626     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2627     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2628 
2629     /* set coloring for off-diagonal portion */
2630     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2631     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2632     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2633     for (i=0; i<a->B->n; i++) {
2634       colors[i] = coloring->colors[larray[i]];
2635     }
2636     ierr = PetscFree(larray);CHKERRQ(ierr);
2637     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2638     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2640   } else {
2641     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2642   }
2643 
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #if defined(PETSC_HAVE_ADIC)
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2650 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2651 {
2652   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2653   PetscErrorCode ierr;
2654 
2655   PetscFunctionBegin;
2656   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2657   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2658   PetscFunctionReturn(0);
2659 }
2660 #endif
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2664 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2665 {
2666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2667   PetscErrorCode ierr;
2668 
2669   PetscFunctionBegin;
2670   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2671   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 #undef __FUNCT__
2676 #define __FUNCT__ "MatMerge"
2677 /*@C
2678       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2679                  matrices from each processor
2680 
2681     Collective on MPI_Comm
2682 
2683    Input Parameters:
2684 +    comm - the communicators the parallel matrix will live on
2685 .    inmat - the input sequential matrices
2686 .    n - number of local columns (or PETSC_DECIDE)
2687 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2688 
2689    Output Parameter:
2690 .    outmat - the parallel matrix generated
2691 
2692     Level: advanced
2693 
2694    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2695 
2696 @*/
2697 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2698 {
2699   PetscErrorCode ierr;
2700   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2701   PetscInt       *indx;
2702   PetscScalar    *values;
2703   PetscMap       columnmap,rowmap;
2704 
2705   PetscFunctionBegin;
2706     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2707   /*
2708   PetscMPIInt       rank;
2709   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2710   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2711   */
2712   if (scall == MAT_INITIAL_MATRIX){
2713     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2714     if (n == PETSC_DECIDE){
2715       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2716       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2717       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2718       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2719       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2720     }
2721 
2722     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2723     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2724     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2725     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2726     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2727 
2728     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2729     for (i=0;i<m;i++) {
2730       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2731       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2732       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2733     }
2734     /* This routine will ONLY return MPIAIJ type matrix */
2735     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2736     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2737     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2738     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2739 
2740   } else if (scall == MAT_REUSE_MATRIX){
2741     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2742   } else {
2743     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2744   }
2745 
2746   for (i=0;i<m;i++) {
2747     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2748     I    = i + rstart;
2749     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2750     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2751   }
2752   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2753   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatFileSplit"
2761 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2762 {
2763   PetscErrorCode    ierr;
2764   PetscMPIInt       rank;
2765   PetscInt          m,N,i,rstart,nnz;
2766   size_t            len;
2767   const PetscInt    *indx;
2768   PetscViewer       out;
2769   char              *name;
2770   Mat               B;
2771   const PetscScalar *values;
2772 
2773   PetscFunctionBegin;
2774   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2775   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2776   /* Should this be the type of the diagonal block of A? */
2777   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2778   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2779   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2780   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2781   for (i=0;i<m;i++) {
2782     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2783     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2784     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2785   }
2786   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788 
2789   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2790   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2791   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2792   sprintf(name,"%s.%d",outfile,rank);
2793   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2794   ierr = PetscFree(name);
2795   ierr = MatView(B,out);CHKERRQ(ierr);
2796   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2797   ierr = MatDestroy(B);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2804 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2805 {
2806   PetscErrorCode       ierr;
2807   Mat_Merge_SeqsToMPI  *merge;
2808   PetscObjectContainer container;
2809 
2810   PetscFunctionBegin;
2811   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2812   if (container) {
2813     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2814     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2815     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2816     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2817     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2818     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2819     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2820     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2821     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2822     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2823     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2824     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2825 
2826     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2827     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2828   }
2829   ierr = PetscFree(merge);CHKERRQ(ierr);
2830 
2831   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 #include "src/mat/utils/freespace.h"
2836 #include "petscbt.h"
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatMerge_SeqsToMPI"
2839 /*@C
2840       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2841                  matrices from each processor
2842 
2843     Collective on MPI_Comm
2844 
2845    Input Parameters:
2846 +    comm - the communicators the parallel matrix will live on
2847 .    seqmat - the input sequential matrices
2848 .    m - number of local rows (or PETSC_DECIDE)
2849 .    n - number of local columns (or PETSC_DECIDE)
2850 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2851 
2852    Output Parameter:
2853 .    mpimat - the parallel matrix generated
2854 
2855     Level: advanced
2856 
2857    Notes:
2858      The dimensions of the sequential matrix in each processor MUST be the same.
2859      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2860      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2861 @*/
2862 static PetscEvent logkey_seqstompinum = 0;
2863 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2864 {
2865   PetscErrorCode       ierr;
2866   MPI_Comm             comm=mpimat->comm;
2867   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2868   PetscMPIInt          size,rank,taga,*len_s;
2869   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2870   PetscInt             proc,m;
2871   PetscInt             **buf_ri,**buf_rj;
2872   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2873   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2874   MPI_Request          *s_waits,*r_waits;
2875   MPI_Status           *status;
2876   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2877   Mat_Merge_SeqsToMPI  *merge;
2878   PetscObjectContainer container;
2879 
2880   PetscFunctionBegin;
2881   if (!logkey_seqstompinum) {
2882     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2883   }
2884   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2885 
2886   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2888 
2889   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2890   if (container) {
2891     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2892   }
2893   bi     = merge->bi;
2894   bj     = merge->bj;
2895   buf_ri = merge->buf_ri;
2896   buf_rj = merge->buf_rj;
2897 
2898   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2899   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2900   len_s  = merge->len_s;
2901 
2902   /* send and recv matrix values */
2903   /*-----------------------------*/
2904   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2905   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2906 
2907   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2908   for (proc=0,k=0; proc<size; proc++){
2909     if (!len_s[proc]) continue;
2910     i = owners[proc];
2911     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2912     k++;
2913   }
2914 
2915   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2916   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2917   ierr = PetscFree(status);CHKERRQ(ierr);
2918 
2919   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2920   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2921 
2922   /* insert mat values of mpimat */
2923   /*----------------------------*/
2924   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2925   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2926   nextrow = buf_ri_k + merge->nrecv;
2927   nextai  = nextrow + merge->nrecv;
2928 
2929   for (k=0; k<merge->nrecv; k++){
2930     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2931     nrows = *(buf_ri_k[k]);
2932     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2933     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2934   }
2935 
2936   /* set values of ba */
2937   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2938   for (i=0; i<m; i++) {
2939     arow = owners[rank] + i;
2940     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2941     bnzi = bi[i+1] - bi[i];
2942     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2943 
2944     /* add local non-zero vals of this proc's seqmat into ba */
2945     anzi = ai[arow+1] - ai[arow];
2946     aj   = a->j + ai[arow];
2947     aa   = a->a + ai[arow];
2948     nextaj = 0;
2949     for (j=0; nextaj<anzi; j++){
2950       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2951         ba_i[j] += aa[nextaj++];
2952       }
2953     }
2954 
2955     /* add received vals into ba */
2956     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2957       /* i-th row */
2958       if (i == *nextrow[k]) {
2959         anzi = *(nextai[k]+1) - *nextai[k];
2960         aj   = buf_rj[k] + *(nextai[k]);
2961         aa   = abuf_r[k] + *(nextai[k]);
2962         nextaj = 0;
2963         for (j=0; nextaj<anzi; j++){
2964           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2965             ba_i[j] += aa[nextaj++];
2966           }
2967         }
2968         nextrow[k]++; nextai[k]++;
2969       }
2970     }
2971     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2972   }
2973   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975 
2976   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2977   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2978   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2979   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 static PetscEvent logkey_seqstompisym = 0;
2983 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2984 {
2985   PetscErrorCode       ierr;
2986   Mat                  B_mpi;
2987   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2988   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2989   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2990   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2991   PetscInt             len,proc,*dnz,*onz;
2992   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2993   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2994   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
2995   MPI_Status           *status;
2996   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
2997   PetscBT              lnkbt;
2998   Mat_Merge_SeqsToMPI  *merge;
2999   PetscObjectContainer container;
3000 
3001   PetscFunctionBegin;
3002   if (!logkey_seqstompisym) {
3003     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3004   }
3005   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3006 
3007   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3008   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3009 
3010   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3011   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3012 
3013   /* determine row ownership */
3014   /*---------------------------------------------------------*/
3015   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3016   if (m == PETSC_DECIDE) {
3017     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3018   } else {
3019     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3020   }
3021   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3022   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3023   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3024 
3025   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3026   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3027 
3028   /* determine the number of messages to send, their lengths */
3029   /*---------------------------------------------------------*/
3030   len_s  = merge->len_s;
3031 
3032   len = 0;  /* length of buf_si[] */
3033   merge->nsend = 0;
3034   for (proc=0; proc<size; proc++){
3035     len_si[proc] = 0;
3036     if (proc == rank){
3037       len_s[proc] = 0;
3038     } else {
3039       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3040       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3041     }
3042     if (len_s[proc]) {
3043       merge->nsend++;
3044       nrows = 0;
3045       for (i=owners[proc]; i<owners[proc+1]; i++){
3046         if (ai[i+1] > ai[i]) nrows++;
3047       }
3048       len_si[proc] = 2*(nrows+1);
3049       len += len_si[proc];
3050     }
3051   }
3052 
3053   /* determine the number and length of messages to receive for ij-structure */
3054   /*-------------------------------------------------------------------------*/
3055   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3056   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3057 
3058   /* post the Irecv of j-structure */
3059   /*-------------------------------*/
3060   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3061   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3062 
3063   /* post the Isend of j-structure */
3064   /*--------------------------------*/
3065   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3066   sj_waits = si_waits + merge->nsend;
3067 
3068   for (proc=0, k=0; proc<size; proc++){
3069     if (!len_s[proc]) continue;
3070     i = owners[proc];
3071     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3072     k++;
3073   }
3074 
3075   /* receives and sends of j-structure are complete */
3076   /*------------------------------------------------*/
3077   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3078   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3079 
3080   /* send and recv i-structure */
3081   /*---------------------------*/
3082   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3083   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3084 
3085   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3086   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3087   for (proc=0,k=0; proc<size; proc++){
3088     if (!len_s[proc]) continue;
3089     /* form outgoing message for i-structure:
3090          buf_si[0]:                 nrows to be sent
3091                [1:nrows]:           row index (global)
3092                [nrows+1:2*nrows+1]: i-structure index
3093     */
3094     /*-------------------------------------------*/
3095     nrows = len_si[proc]/2 - 1;
3096     buf_si_i    = buf_si + nrows+1;
3097     buf_si[0]   = nrows;
3098     buf_si_i[0] = 0;
3099     nrows = 0;
3100     for (i=owners[proc]; i<owners[proc+1]; i++){
3101       anzi = ai[i+1] - ai[i];
3102       if (anzi) {
3103         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3104         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3105         nrows++;
3106       }
3107     }
3108     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3109     k++;
3110     buf_si += len_si[proc];
3111   }
3112 
3113   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3114   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3115 
3116   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3117   for (i=0; i<merge->nrecv; i++){
3118     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3119   }
3120 
3121   ierr = PetscFree(len_si);CHKERRQ(ierr);
3122   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3123   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3124   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3125   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3126   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3127   ierr = PetscFree(status);CHKERRQ(ierr);
3128 
3129   /* compute a local seq matrix in each processor */
3130   /*----------------------------------------------*/
3131   /* allocate bi array and free space for accumulating nonzero column info */
3132   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3133   bi[0] = 0;
3134 
3135   /* create and initialize a linked list */
3136   nlnk = N+1;
3137   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3138 
3139   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3140   len = 0;
3141   len  = ai[owners[rank+1]] - ai[owners[rank]];
3142   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3143   current_space = free_space;
3144 
3145   /* determine symbolic info for each local row */
3146   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3147   nextrow = buf_ri_k + merge->nrecv;
3148   nextai  = nextrow + merge->nrecv;
3149   for (k=0; k<merge->nrecv; k++){
3150     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3151     nrows = *buf_ri_k[k];
3152     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3153     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3154   }
3155 
3156   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3157   len = 0;
3158   for (i=0;i<m;i++) {
3159     bnzi   = 0;
3160     /* add local non-zero cols of this proc's seqmat into lnk */
3161     arow   = owners[rank] + i;
3162     anzi   = ai[arow+1] - ai[arow];
3163     aj     = a->j + ai[arow];
3164     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3165     bnzi += nlnk;
3166     /* add received col data into lnk */
3167     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3168       if (i == *nextrow[k]) { /* i-th row */
3169         anzi = *(nextai[k]+1) - *nextai[k];
3170         aj   = buf_rj[k] + *nextai[k];
3171         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3172         bnzi += nlnk;
3173         nextrow[k]++; nextai[k]++;
3174       }
3175     }
3176     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3177 
3178     /* if free space is not available, make more free space */
3179     if (current_space->local_remaining<bnzi) {
3180       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3181       nspacedouble++;
3182     }
3183     /* copy data into free space, then initialize lnk */
3184     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3185     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3186 
3187     current_space->array           += bnzi;
3188     current_space->local_used      += bnzi;
3189     current_space->local_remaining -= bnzi;
3190 
3191     bi[i+1] = bi[i] + bnzi;
3192   }
3193 
3194   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3195 
3196   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3197   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3198   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3199 
3200   /* create symbolic parallel matrix B_mpi */
3201   /*---------------------------------------*/
3202   if (n==PETSC_DECIDE) {
3203     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3204   } else {
3205     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3206   }
3207   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3208   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3209   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3210 
3211   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3212   B_mpi->assembled     = PETSC_FALSE;
3213   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3214   merge->bi            = bi;
3215   merge->bj            = bj;
3216   merge->buf_ri        = buf_ri;
3217   merge->buf_rj        = buf_rj;
3218   merge->coi           = PETSC_NULL;
3219   merge->coj           = PETSC_NULL;
3220   merge->owners_co     = PETSC_NULL;
3221 
3222   /* attach the supporting struct to B_mpi for reuse */
3223   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3224   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3225   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3226   *mpimat = B_mpi;
3227   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3228   PetscFunctionReturn(0);
3229 }
3230 
3231 static PetscEvent logkey_seqstompi = 0;
3232 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3233 {
3234   PetscErrorCode   ierr;
3235 
3236   PetscFunctionBegin;
3237   if (!logkey_seqstompi) {
3238     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3239   }
3240   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3241   if (scall == MAT_INITIAL_MATRIX){
3242     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3243   }
3244   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3245   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3246   PetscFunctionReturn(0);
3247 }
3248 static PetscEvent logkey_getlocalmat = 0;
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatGetLocalMat"
3251 /*@C
3252      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3253 
3254     Not Collective
3255 
3256    Input Parameters:
3257 +    A - the matrix
3258 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3259 
3260    Output Parameter:
3261 .    A_loc - the local sequential matrix generated
3262 
3263     Level: developer
3264 
3265 @*/
3266 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3267 {
3268   PetscErrorCode  ierr;
3269   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3270   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3271   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3272   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3273   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3274   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3275 
3276   PetscFunctionBegin;
3277   if (!logkey_getlocalmat) {
3278     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3279   }
3280   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3281   if (scall == MAT_INITIAL_MATRIX){
3282     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3283     ci[0] = 0;
3284     for (i=0; i<am; i++){
3285       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3286     }
3287     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3288     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3289     k = 0;
3290     for (i=0; i<am; i++) {
3291       ncols_o = bi[i+1] - bi[i];
3292       ncols_d = ai[i+1] - ai[i];
3293       /* off-diagonal portion of A */
3294       for (jo=0; jo<ncols_o; jo++) {
3295         col = cmap[*bj];
3296         if (col >= cstart) break;
3297         cj[k]   = col; bj++;
3298         ca[k++] = *ba++;
3299       }
3300       /* diagonal portion of A */
3301       for (j=0; j<ncols_d; j++) {
3302         cj[k]   = cstart + *aj++;
3303         ca[k++] = *aa++;
3304       }
3305       /* off-diagonal portion of A */
3306       for (j=jo; j<ncols_o; j++) {
3307         cj[k]   = cmap[*bj++];
3308         ca[k++] = *ba++;
3309       }
3310     }
3311     /* put together the new matrix */
3312     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3313     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3314     /* Since these are PETSc arrays, change flags to free them as necessary. */
3315     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3316     mat->freedata = PETSC_TRUE;
3317     mat->nonew    = 0;
3318   } else if (scall == MAT_REUSE_MATRIX){
3319     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3320     ci = mat->i; cj = mat->j; ca = mat->a;
3321     for (i=0; i<am; i++) {
3322       /* off-diagonal portion of A */
3323       ncols_o = bi[i+1] - bi[i];
3324       for (jo=0; jo<ncols_o; jo++) {
3325         col = cmap[*bj];
3326         if (col >= cstart) break;
3327         *ca++ = *ba++; bj++;
3328       }
3329       /* diagonal portion of A */
3330       ncols_d = ai[i+1] - ai[i];
3331       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3332       /* off-diagonal portion of A */
3333       for (j=jo; j<ncols_o; j++) {
3334         *ca++ = *ba++; bj++;
3335       }
3336     }
3337   } else {
3338     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3339   }
3340 
3341   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 static PetscEvent logkey_getlocalmatcondensed = 0;
3346 #undef __FUNCT__
3347 #define __FUNCT__ "MatGetLocalMatCondensed"
3348 /*@C
3349      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3350 
3351     Not Collective
3352 
3353    Input Parameters:
3354 +    A - the matrix
3355 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3356 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3357 
3358    Output Parameter:
3359 .    A_loc - the local sequential matrix generated
3360 
3361     Level: developer
3362 
3363 @*/
3364 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3365 {
3366   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3367   PetscErrorCode    ierr;
3368   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3369   IS                isrowa,iscola;
3370   Mat               *aloc;
3371 
3372   PetscFunctionBegin;
3373   if (!logkey_getlocalmatcondensed) {
3374     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3375   }
3376   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3377   if (!row){
3378     start = a->rstart; end = a->rend;
3379     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3380   } else {
3381     isrowa = *row;
3382   }
3383   if (!col){
3384     start = a->cstart;
3385     cmap  = a->garray;
3386     nzA   = a->A->n;
3387     nzB   = a->B->n;
3388     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3389     ncols = 0;
3390     for (i=0; i<nzB; i++) {
3391       if (cmap[i] < start) idx[ncols++] = cmap[i];
3392       else break;
3393     }
3394     imark = i;
3395     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3396     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3397     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3398     ierr = PetscFree(idx);CHKERRQ(ierr);
3399   } else {
3400     iscola = *col;
3401   }
3402   if (scall != MAT_INITIAL_MATRIX){
3403     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3404     aloc[0] = *A_loc;
3405   }
3406   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3407   *A_loc = aloc[0];
3408   ierr = PetscFree(aloc);CHKERRQ(ierr);
3409   if (!row){
3410     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3411   }
3412   if (!col){
3413     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3414   }
3415   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 static PetscEvent logkey_GetBrowsOfAcols = 0;
3420 #undef __FUNCT__
3421 #define __FUNCT__ "MatGetBrowsOfAcols"
3422 /*@C
3423     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3424 
3425     Collective on Mat
3426 
3427    Input Parameters:
3428 +    A,B - the matrices in mpiaij format
3429 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3430 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3431 
3432    Output Parameter:
3433 +    rowb, colb - index sets of rows and columns of B to extract
3434 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3435 -    B_seq - the sequential matrix generated
3436 
3437     Level: developer
3438 
3439 @*/
3440 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3441 {
3442   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3443   PetscErrorCode    ierr;
3444   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3445   IS                isrowb,iscolb;
3446   Mat               *bseq;
3447 
3448   PetscFunctionBegin;
3449   if (a->cstart != b->rstart || a->cend != b->rend){
3450     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3451   }
3452   if (!logkey_GetBrowsOfAcols) {
3453     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3454   }
3455   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3456 
3457   if (scall == MAT_INITIAL_MATRIX){
3458     start = a->cstart;
3459     cmap  = a->garray;
3460     nzA   = a->A->n;
3461     nzB   = a->B->n;
3462     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3463     ncols = 0;
3464     for (i=0; i<nzB; i++) {  /* row < local row index */
3465       if (cmap[i] < start) idx[ncols++] = cmap[i];
3466       else break;
3467     }
3468     imark = i;
3469     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3470     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3471     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3472     ierr = PetscFree(idx);CHKERRQ(ierr);
3473     *brstart = imark;
3474     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3475   } else {
3476     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3477     isrowb = *rowb; iscolb = *colb;
3478     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3479     bseq[0] = *B_seq;
3480   }
3481   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3482   *B_seq = bseq[0];
3483   ierr = PetscFree(bseq);CHKERRQ(ierr);
3484   if (!rowb){
3485     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3486   } else {
3487     *rowb = isrowb;
3488   }
3489   if (!colb){
3490     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3491   } else {
3492     *colb = iscolb;
3493   }
3494   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 static PetscEvent logkey_GetBrowsOfAocols = 0;
3499 #undef __FUNCT__
3500 #define __FUNCT__ "MatGetBrowsOfAoCols"
3501 /*@C
3502     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3503     of the OFF-DIAGONAL portion of local A
3504 
3505     Collective on Mat
3506 
3507    Input Parameters:
3508 +    A,B - the matrices in mpiaij format
3509 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3510 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3511 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3512 
3513    Output Parameter:
3514 +    B_oth - the sequential matrix generated
3515 
3516     Level: developer
3517 
3518 @*/
3519 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3520 {
3521   VecScatter_MPI_General *gen_to,*gen_from;
3522   PetscErrorCode         ierr;
3523   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3524   Mat_SeqAIJ             *b_oth;
3525   VecScatter             ctx=a->Mvctx;
3526   MPI_Comm               comm=ctx->comm;
3527   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3528   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3529   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3530   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3531   MPI_Request            *rwaits,*swaits;
3532   MPI_Status             *sstatus,rstatus;
3533   PetscInt               *cols;
3534   PetscScalar            *vals;
3535   PetscMPIInt            j;
3536 
3537   PetscFunctionBegin;
3538   if (a->cstart != b->rstart || a->cend != b->rend){
3539     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3540   }
3541   if (!logkey_GetBrowsOfAocols) {
3542     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3543   }
3544   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3545   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3546 
3547   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3548   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3549   rvalues  = gen_from->values; /* holds the length of sending row */
3550   svalues  = gen_to->values;   /* holds the length of receiving row */
3551   nrecvs   = gen_from->n;
3552   nsends   = gen_to->n;
3553   rwaits   = gen_from->requests;
3554   swaits   = gen_to->requests;
3555   srow     = gen_to->indices;   /* local row index to be sent */
3556   rstarts  = gen_from->starts;
3557   sstarts  = gen_to->starts;
3558   rprocs   = gen_from->procs;
3559   sprocs   = gen_to->procs;
3560   sstatus  = gen_to->sstatus;
3561 
3562   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3563   if (scall == MAT_INITIAL_MATRIX){
3564     /* i-array */
3565     /*---------*/
3566     /*  post receives */
3567     for (i=0; i<nrecvs; i++){
3568       rowlen = (PetscInt*)rvalues + rstarts[i];
3569       nrows = rstarts[i+1]-rstarts[i];
3570       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3571     }
3572 
3573     /* pack the outgoing message */
3574     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3575     rstartsj = sstartsj + nsends +1;
3576     sstartsj[0] = 0;  rstartsj[0] = 0;
3577     len = 0; /* total length of j or a array to be sent */
3578     k = 0;
3579     for (i=0; i<nsends; i++){
3580       rowlen = (PetscInt*)svalues + sstarts[i];
3581       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3582       for (j=0; j<nrows; j++) {
3583         row = srow[k] + b->rowners[rank]; /* global row idx */
3584         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3585         len += rowlen[j];
3586         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3587         k++;
3588       }
3589       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3590        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3591     }
3592     /* recvs and sends of i-array are completed */
3593     i = nrecvs;
3594     while (i--) {
3595       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3596     }
3597     if (nsends) {
3598       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3599     }
3600     /* allocate buffers for sending j and a arrays */
3601     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3602     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3603 
3604     /* create i-array of B_oth */
3605     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3606     b_othi[0] = 0;
3607     len = 0; /* total length of j or a array to be received */
3608     k = 0;
3609     for (i=0; i<nrecvs; i++){
3610       rowlen = (PetscInt*)rvalues + rstarts[i];
3611       nrows = rstarts[i+1]-rstarts[i];
3612       for (j=0; j<nrows; j++) {
3613         b_othi[k+1] = b_othi[k] + rowlen[j];
3614         len += rowlen[j]; k++;
3615       }
3616       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3617     }
3618 
3619     /* allocate space for j and a arrrays of B_oth */
3620     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3621     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3622 
3623     /* j-array */
3624     /*---------*/
3625     /*  post receives of j-array */
3626     for (i=0; i<nrecvs; i++){
3627       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3628       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3629     }
3630     k = 0;
3631     for (i=0; i<nsends; i++){
3632       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3633       bufJ = bufj+sstartsj[i];
3634       for (j=0; j<nrows; j++) {
3635         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3636         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3637         for (l=0; l<ncols; l++){
3638           *bufJ++ = cols[l];
3639         }
3640         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3641       }
3642       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3643     }
3644 
3645     /* recvs and sends of j-array are completed */
3646     i = nrecvs;
3647     while (i--) {
3648       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3649     }
3650     if (nsends) {
3651       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3652     }
3653   } else if (scall == MAT_REUSE_MATRIX){
3654     sstartsj = *startsj;
3655     rstartsj = sstartsj + nsends +1;
3656     bufa     = *bufa_ptr;
3657     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3658     b_otha   = b_oth->a;
3659   } else {
3660     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3661   }
3662 
3663   /* a-array */
3664   /*---------*/
3665   /*  post receives of a-array */
3666   for (i=0; i<nrecvs; i++){
3667     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3668     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3669   }
3670   k = 0;
3671   for (i=0; i<nsends; i++){
3672     nrows = sstarts[i+1]-sstarts[i];
3673     bufA = bufa+sstartsj[i];
3674     for (j=0; j<nrows; j++) {
3675       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3676       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3677       for (l=0; l<ncols; l++){
3678         *bufA++ = vals[l];
3679       }
3680       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3681 
3682     }
3683     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3684   }
3685   /* recvs and sends of a-array are completed */
3686   i = nrecvs;
3687   while (i--) {
3688     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3689   }
3690    if (nsends) {
3691     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3692   }
3693 
3694   if (scall == MAT_INITIAL_MATRIX){
3695     /* put together the new matrix */
3696     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3697 
3698     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3699     /* Since these are PETSc arrays, change flags to free them as necessary. */
3700     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3701     b_oth->freedata = PETSC_TRUE;
3702     b_oth->nonew    = 0;
3703 
3704     ierr = PetscFree(bufj);CHKERRQ(ierr);
3705     if (!startsj || !bufa_ptr){
3706       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3707       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3708     } else {
3709       *startsj  = sstartsj;
3710       *bufa_ptr = bufa;
3711     }
3712   }
3713   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3714 
3715   PetscFunctionReturn(0);
3716 }
3717 
3718 /*MC
3719    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3720 
3721    Options Database Keys:
3722 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3723 
3724   Level: beginner
3725 
3726 .seealso: MatCreateMPIAIJ
3727 M*/
3728 
3729 EXTERN_C_BEGIN
3730 #undef __FUNCT__
3731 #define __FUNCT__ "MatCreate_MPIAIJ"
3732 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3733 {
3734   Mat_MPIAIJ     *b;
3735   PetscErrorCode ierr;
3736   PetscInt       i;
3737   PetscMPIInt    size;
3738 
3739   PetscFunctionBegin;
3740   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3741 
3742   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3743   B->data         = (void*)b;
3744   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3745   B->factor       = 0;
3746   B->bs           = 1;
3747   B->assembled    = PETSC_FALSE;
3748   B->mapping      = 0;
3749 
3750   B->insertmode      = NOT_SET_VALUES;
3751   b->size            = size;
3752   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3753 
3754   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3755   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3756 
3757   /* the information in the maps duplicates the information computed below, eventually
3758      we should remove the duplicate information that is not contained in the maps */
3759   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3760   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3761 
3762   /* build local table of row and column ownerships */
3763   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3764   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3765   b->cowners = b->rowners + b->size + 2;
3766   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3767   b->rowners[0] = 0;
3768   for (i=2; i<=b->size; i++) {
3769     b->rowners[i] += b->rowners[i-1];
3770   }
3771   b->rstart = b->rowners[b->rank];
3772   b->rend   = b->rowners[b->rank+1];
3773   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3774   b->cowners[0] = 0;
3775   for (i=2; i<=b->size; i++) {
3776     b->cowners[i] += b->cowners[i-1];
3777   }
3778   b->cstart = b->cowners[b->rank];
3779   b->cend   = b->cowners[b->rank+1];
3780 
3781   /* build cache for off array entries formed */
3782   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3783   b->donotstash  = PETSC_FALSE;
3784   b->colmap      = 0;
3785   b->garray      = 0;
3786   b->roworiented = PETSC_TRUE;
3787 
3788   /* stuff used for matrix vector multiply */
3789   b->lvec      = PETSC_NULL;
3790   b->Mvctx     = PETSC_NULL;
3791 
3792   /* stuff for MatGetRow() */
3793   b->rowindices   = 0;
3794   b->rowvalues    = 0;
3795   b->getrowactive = PETSC_FALSE;
3796 
3797   /* Explicitly create 2 MATSEQAIJ matrices. */
3798   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3799   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3800   PetscLogObjectParent(B,b->A);
3801   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3802   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3803   PetscLogObjectParent(B,b->B);
3804 
3805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3806                                      "MatStoreValues_MPIAIJ",
3807                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3809                                      "MatRetrieveValues_MPIAIJ",
3810                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3812 				     "MatGetDiagonalBlock_MPIAIJ",
3813                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3815 				     "MatIsTranspose_MPIAIJ",
3816 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3818 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3819 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3821 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3822 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3824 				     "MatDiagonalScaleLocal_MPIAIJ",
3825 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3826   PetscFunctionReturn(0);
3827 }
3828 EXTERN_C_END
3829