xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 69a612a9be7fbcc0ab27d96ce41c4e3795ac7f2a)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403 
404   /* used by MatAXPY() */
405   a->xtoy = 0; b->xtoy = 0;
406   a->XtoY = 0; b->XtoY = 0;
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroRows_MPIAIJ"
426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427 {
428   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
429   PetscErrorCode ierr;
430   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431   PetscInt       i,N,*rows,*owners = l->rowners;
432   PetscInt       *nprocs,j,idx,nsends,row;
433   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434   PetscInt       *rvalues,count,base,slen,*source;
435   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436   MPI_Comm       comm = A->comm;
437   MPI_Request    *send_waits,*recv_waits;
438   MPI_Status     recv_status,*send_status;
439   IS             istmp;
440   PetscTruth     found;
441 
442   PetscFunctionBegin;
443   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
444   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
445 
446   /*  first count number of contributors to each processor */
447   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
448   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
464 
465   /* post receives:   */
466   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
468   for (i=0; i<nrecvs; i++) {
469     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
479   starts[0] = 0;
480   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481   for (i=0; i<N; i++) {
482     svalues[starts[owner[i]]++] = rows[i];
483   }
484   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
485 
486   starts[0] = 0;
487   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488   count = 0;
489   for (i=0; i<size; i++) {
490     if (nprocs[2*i+1]) {
491       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
492     }
493   }
494   ierr = PetscFree(starts);CHKERRQ(ierr);
495 
496   base = owners[rank];
497 
498   /*  wait on receives */
499   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
500   source = lens + nrecvs;
501   count  = nrecvs; slen = 0;
502   while (count) {
503     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
504     /* unpack receives into our local space */
505     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
506     source[imdex]  = recv_status.MPI_SOURCE;
507     lens[imdex]    = n;
508     slen          += n;
509     count--;
510   }
511   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
512 
513   /* move the data into the send scatter */
514   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
515   count = 0;
516   for (i=0; i<nrecvs; i++) {
517     values = rvalues + i*nmax;
518     for (j=0; j<lens[i]; j++) {
519       lrows[count++] = values[j] - base;
520     }
521   }
522   ierr = PetscFree(rvalues);CHKERRQ(ierr);
523   ierr = PetscFree(lens);CHKERRQ(ierr);
524   ierr = PetscFree(owner);CHKERRQ(ierr);
525   ierr = PetscFree(nprocs);CHKERRQ(ierr);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PetscLogObjectParent(A,istmp);
530 
531   /*
532         Zero the required rows. If the "diagonal block" of the matrix
533      is square and the user wishes to set the diagonal we use seperate
534      code so that MatSetValues() is not called for each diagonal allocating
535      new memory, thus calling lots of mallocs and slowing things down.
536 
537        Contributed by: Mathew Knepley
538   */
539   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
541   if (diag && (l->A->M == l->A->N)) {
542     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
543   } else if (diag) {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548     }
549     for (i = 0; i < slen; i++) {
550       row  = lrows[i] + rstart;
551       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
552     }
553     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555   } else {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557   }
558   ierr = ISDestroy(istmp);CHKERRQ(ierr);
559   ierr = PetscFree(lrows);CHKERRQ(ierr);
560 
561   /* wait on sends */
562   if (nsends) {
563     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
564     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
565     ierr = PetscFree(send_status);CHKERRQ(ierr);
566   }
567   ierr = PetscFree(send_waits);CHKERRQ(ierr);
568   ierr = PetscFree(svalues);CHKERRQ(ierr);
569 
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatMult_MPIAIJ"
575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576 {
577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
578   PetscErrorCode ierr;
579   PetscInt       nt;
580 
581   PetscFunctionBegin;
582   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
583   if (nt != A->n) {
584     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585   }
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_MPIAIJ"
595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   /* do nondiagonal part */
617   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 EXTERN_C_BEGIN
630 #undef __FUNCT__
631 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633 {
634   MPI_Comm       comm;
635   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637   IS             Me,Notme;
638   PetscErrorCode ierr;
639   PetscInt       M,N,first,last,*notme,i;
640   PetscMPIInt    size;
641 
642   PetscFunctionBegin;
643 
644   /* Easy test: symmetric diagonal block */
645   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
647   if (!*f) PetscFunctionReturn(0);
648   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
650   if (size == 1) PetscFunctionReturn(0);
651 
652   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
654   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
655   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
656   for (i=0; i<first; i++) notme[i] = i;
657   for (i=last; i<M; i++) notme[i-last+first] = i;
658   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
659   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
660   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
661   Aoff = Aoffs[0];
662   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
663   Boff = Boffs[0];
664   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
666   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
667   ierr = ISDestroy(Me);CHKERRQ(ierr);
668   ierr = ISDestroy(Notme);CHKERRQ(ierr);
669 
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   PetscErrorCode ierr;
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753 
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatView_MPIAIJ_Binary"
766 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
767 {
768   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
769   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
770   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
771   PetscErrorCode    ierr;
772   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
773   int               fd;
774   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
775   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
776   PetscScalar       *column_values;
777 
778   PetscFunctionBegin;
779   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
780   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
781   nz   = A->nz + B->nz;
782   if (!rank) {
783     header[0] = MAT_FILE_COOKIE;
784     header[1] = mat->M;
785     header[2] = mat->N;
786     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
787     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
788     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     /* get largest number of rows any processor has */
790     rlen = mat->m;
791     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
792     for (i=1; i<size; i++) {
793       rlen = PetscMax(rlen,range[i+1] - range[i]);
794     }
795   } else {
796     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
797     rlen = mat->m;
798   }
799 
800   /* load up the local row counts */
801   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
802   for (i=0; i<mat->m; i++) {
803     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
804   }
805 
806   /* store the row lengths to the file */
807   if (!rank) {
808     MPI_Status status;
809     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
810     for (i=1; i<size; i++) {
811       rlen = range[i+1] - range[i];
812       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
813       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     }
815   } else {
816     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
817   }
818   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
819 
820   /* load up the local column indices */
821   nzmax = nz; /* )th processor needs space a largest processor needs */
822   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
823   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
824   cnt  = 0;
825   for (i=0; i<mat->m; i++) {
826     for (j=B->i[i]; j<B->i[i+1]; j++) {
827       if ( (col = garray[B->j[j]]) > cstart) break;
828       column_indices[cnt++] = col;
829     }
830     for (k=A->i[i]; k<A->i[i+1]; k++) {
831       column_indices[cnt++] = A->j[k] + cstart;
832     }
833     for (; j<B->i[i+1]; j++) {
834       column_indices[cnt++] = garray[B->j[j]];
835     }
836   }
837   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
838 
839   /* store the column indices to the file */
840   if (!rank) {
841     MPI_Status status;
842     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
843     for (i=1; i<size; i++) {
844       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
845       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
846       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
847       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
848     }
849   } else {
850     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
851     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
852   }
853   ierr = PetscFree(column_indices);CHKERRQ(ierr);
854 
855   /* load up the local column values */
856   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
857   cnt  = 0;
858   for (i=0; i<mat->m; i++) {
859     for (j=B->i[i]; j<B->i[i+1]; j++) {
860       if ( garray[B->j[j]] > cstart) break;
861       column_values[cnt++] = B->a[j];
862     }
863     for (k=A->i[i]; k<A->i[i+1]; k++) {
864       column_values[cnt++] = A->a[k];
865     }
866     for (; j<B->i[i+1]; j++) {
867       column_values[cnt++] = B->a[j];
868     }
869   }
870   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
871 
872   /* store the column values to the file */
873   if (!rank) {
874     MPI_Status status;
875     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
876     for (i=1; i<size; i++) {
877       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
878       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
879       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
880       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
881     }
882   } else {
883     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
884     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
885   }
886   ierr = PetscFree(column_values);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
892 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
893 {
894   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
895   PetscErrorCode    ierr;
896   PetscMPIInt       rank = aij->rank,size = aij->size;
897   PetscTruth        isdraw,iascii,flg,isbinary;
898   PetscViewer       sviewer;
899   PetscViewerFormat format;
900 
901   PetscFunctionBegin;
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
905   if (iascii) {
906     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
907     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
908       MatInfo info;
909       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
910       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
911       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
912       if (flg) {
913         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
914 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
915       } else {
916         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
917 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
918       }
919       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
920       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
921       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
922       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
923       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
924       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
925       PetscFunctionReturn(0);
926     } else if (format == PETSC_VIEWER_ASCII_INFO) {
927       PetscFunctionReturn(0);
928     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
929       PetscFunctionReturn(0);
930     }
931   } else if (isbinary) {
932     if (size == 1) {
933       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935     } else {
936       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
937     }
938     PetscFunctionReturn(0);
939   } else if (isdraw) {
940     PetscDraw  draw;
941     PetscTruth isnull;
942     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
943     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
944   }
945 
946   if (size == 1) {
947     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
948     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
949   } else {
950     /* assemble the entire matrix onto first processor. */
951     Mat         A;
952     Mat_SeqAIJ *Aloc;
953     PetscInt         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
954     PetscScalar *a;
955 
956     if (!rank) {
957       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
958     } else {
959       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
960     }
961     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
962     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
963     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
964     PetscLogObjectParent(mat,A);
965 
966     /* copy over the A part */
967     Aloc = (Mat_SeqAIJ*)aij->A->data;
968     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
969     row = aij->rstart;
970     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
974     }
975     aj = Aloc->j;
976     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
977 
978     /* copy over the B part */
979     Aloc = (Mat_SeqAIJ*)aij->B->data;
980     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
981     row  = aij->rstart;
982     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
983     ct   = cols;
984     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
985     for (i=0; i<m; i++) {
986       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
987       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
988     }
989     ierr = PetscFree(ct);CHKERRQ(ierr);
990     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992     /*
993        Everyone has to call to draw the matrix since the graphics waits are
994        synchronized across all processors that share the PetscDraw object
995     */
996     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
999       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1000     }
1001     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1002     ierr = MatDestroy(A);CHKERRQ(ierr);
1003   }
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatView_MPIAIJ"
1009 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1010 {
1011   PetscErrorCode ierr;
1012   PetscTruth     iascii,isdraw,issocket,isbinary;
1013 
1014   PetscFunctionBegin;
1015   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1016   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1017   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1018   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1019   if (iascii || isdraw || isbinary || issocket) {
1020     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1021   } else {
1022     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatRelax_MPIAIJ"
1031 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1032 {
1033   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1034   PetscErrorCode ierr;
1035   Vec            bb1;
1036   PetscScalar    mone=-1.0;
1037 
1038   PetscFunctionBegin;
1039   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1040 
1041   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1042 
1043   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048 
1049     while (its--) {
1050       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052 
1053       /* update rhs: bb1 = bb - B*x */
1054       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1055       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1056 
1057       /* local sweep */
1058       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1059       CHKERRQ(ierr);
1060     }
1061   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066     while (its--) {
1067       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069 
1070       /* update rhs: bb1 = bb - B*x */
1071       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1072       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1073 
1074       /* local sweep */
1075       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1076       CHKERRQ(ierr);
1077     }
1078   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1079     if (flag & SOR_ZERO_INITIAL_GUESS) {
1080       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1081       its--;
1082     }
1083     while (its--) {
1084       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086 
1087       /* update rhs: bb1 = bb - B*x */
1088       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1089       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1090 
1091       /* local sweep */
1092       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1093       CHKERRQ(ierr);
1094     }
1095   } else {
1096     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1097   }
1098 
1099   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1105 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1106 {
1107   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1108   Mat            A = mat->A,B = mat->B;
1109   PetscErrorCode ierr;
1110   PetscReal      isend[5],irecv[5];
1111 
1112   PetscFunctionBegin;
1113   info->block_size     = 1.0;
1114   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1115   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1116   isend[3] = info->memory;  isend[4] = info->mallocs;
1117   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1118   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1119   isend[3] += info->memory;  isend[4] += info->mallocs;
1120   if (flag == MAT_LOCAL) {
1121     info->nz_used      = isend[0];
1122     info->nz_allocated = isend[1];
1123     info->nz_unneeded  = isend[2];
1124     info->memory       = isend[3];
1125     info->mallocs      = isend[4];
1126   } else if (flag == MAT_GLOBAL_MAX) {
1127     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1128     info->nz_used      = irecv[0];
1129     info->nz_allocated = irecv[1];
1130     info->nz_unneeded  = irecv[2];
1131     info->memory       = irecv[3];
1132     info->mallocs      = irecv[4];
1133   } else if (flag == MAT_GLOBAL_SUM) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   }
1141   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1142   info->fill_ratio_needed = 0;
1143   info->factor_mallocs    = 0;
1144   info->rows_global       = (double)matin->M;
1145   info->columns_global    = (double)matin->N;
1146   info->rows_local        = (double)matin->m;
1147   info->columns_local     = (double)matin->N;
1148 
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatSetOption_MPIAIJ"
1154 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1155 {
1156   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1157   PetscErrorCode ierr;
1158 
1159   PetscFunctionBegin;
1160   switch (op) {
1161   case MAT_NO_NEW_NONZERO_LOCATIONS:
1162   case MAT_YES_NEW_NONZERO_LOCATIONS:
1163   case MAT_COLUMNS_UNSORTED:
1164   case MAT_COLUMNS_SORTED:
1165   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1166   case MAT_KEEP_ZEROED_ROWS:
1167   case MAT_NEW_NONZERO_LOCATION_ERR:
1168   case MAT_USE_INODES:
1169   case MAT_DO_NOT_USE_INODES:
1170   case MAT_IGNORE_ZERO_ENTRIES:
1171     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1173     break;
1174   case MAT_ROW_ORIENTED:
1175     a->roworiented = PETSC_TRUE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_ROWS_SORTED:
1180   case MAT_ROWS_UNSORTED:
1181   case MAT_YES_NEW_DIAGONALS:
1182     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1183     break;
1184   case MAT_COLUMN_ORIENTED:
1185     a->roworiented = PETSC_FALSE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_IGNORE_OFF_PROC_ENTRIES:
1190     a->donotstash = PETSC_TRUE;
1191     break;
1192   case MAT_NO_NEW_DIAGONALS:
1193     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1194   case MAT_SYMMETRIC:
1195   case MAT_STRUCTURALLY_SYMMETRIC:
1196   case MAT_HERMITIAN:
1197   case MAT_SYMMETRY_ETERNAL:
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     break;
1200   case MAT_NOT_SYMMETRIC:
1201   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1202   case MAT_NOT_HERMITIAN:
1203   case MAT_NOT_SYMMETRY_ETERNAL:
1204     break;
1205   default:
1206     SETERRQ(PETSC_ERR_SUP,"unknown option");
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetRow_MPIAIJ"
1213 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1214 {
1215   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1216   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1217   PetscErrorCode ierr;
1218   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1219   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1220   PetscInt       *cmap,*idx_p;
1221 
1222   PetscFunctionBegin;
1223   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1224   mat->getrowactive = PETSC_TRUE;
1225 
1226   if (!mat->rowvalues && (idx || v)) {
1227     /*
1228         allocate enough space to hold information from the longest row.
1229     */
1230     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1231     PetscInt     max = 1,tmp;
1232     for (i=0; i<matin->m; i++) {
1233       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1234       if (max < tmp) { max = tmp; }
1235     }
1236     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1237     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1238   }
1239 
1240   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1241   lrow = row - rstart;
1242 
1243   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1244   if (!v)   {pvA = 0; pvB = 0;}
1245   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1246   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1247   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1248   nztot = nzA + nzB;
1249 
1250   cmap  = mat->garray;
1251   if (v  || idx) {
1252     if (nztot) {
1253       /* Sort by increasing column numbers, assuming A and B already sorted */
1254       PetscInt imark = -1;
1255       if (v) {
1256         *v = v_p = mat->rowvalues;
1257         for (i=0; i<nzB; i++) {
1258           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1259           else break;
1260         }
1261         imark = i;
1262         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1263         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1264       }
1265       if (idx) {
1266         *idx = idx_p = mat->rowindices;
1267         if (imark > -1) {
1268           for (i=0; i<imark; i++) {
1269             idx_p[i] = cmap[cworkB[i]];
1270           }
1271         } else {
1272           for (i=0; i<nzB; i++) {
1273             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1274             else break;
1275           }
1276           imark = i;
1277         }
1278         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1279         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1280       }
1281     } else {
1282       if (idx) *idx = 0;
1283       if (v)   *v   = 0;
1284     }
1285   }
1286   *nz = nztot;
1287   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1288   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1294 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1295 {
1296   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1297 
1298   PetscFunctionBegin;
1299   if (aij->getrowactive == PETSC_FALSE) {
1300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1301   }
1302   aij->getrowactive = PETSC_FALSE;
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatNorm_MPIAIJ"
1308 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1309 {
1310   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1311   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1312   PetscErrorCode ierr;
1313   PetscInt       i,j,cstart = aij->cstart;
1314   PetscReal      sum = 0.0;
1315   PetscScalar    *v;
1316 
1317   PetscFunctionBegin;
1318   if (aij->size == 1) {
1319     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1320   } else {
1321     if (type == NORM_FROBENIUS) {
1322       v = amat->a;
1323       for (i=0; i<amat->nz; i++) {
1324 #if defined(PETSC_USE_COMPLEX)
1325         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       v = bmat->a;
1331       for (i=0; i<bmat->nz; i++) {
1332 #if defined(PETSC_USE_COMPLEX)
1333         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334 #else
1335         sum += (*v)*(*v); v++;
1336 #endif
1337       }
1338       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1339       *norm = sqrt(*norm);
1340     } else if (type == NORM_1) { /* max column norm */
1341       PetscReal *tmp,*tmp2;
1342       PetscInt    *jj,*garray = aij->garray;
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1344       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1345       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1346       *norm = 0.0;
1347       v = amat->a; jj = amat->j;
1348       for (j=0; j<amat->nz; j++) {
1349         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1350       }
1351       v = bmat->a; jj = bmat->j;
1352       for (j=0; j<bmat->nz; j++) {
1353         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1354       }
1355       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1356       for (j=0; j<mat->N; j++) {
1357         if (tmp2[j] > *norm) *norm = tmp2[j];
1358       }
1359       ierr = PetscFree(tmp);CHKERRQ(ierr);
1360       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1361     } else if (type == NORM_INFINITY) { /* max row norm */
1362       PetscReal ntemp = 0.0;
1363       for (j=0; j<aij->A->m; j++) {
1364         v = amat->a + amat->i[j];
1365         sum = 0.0;
1366         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         v = bmat->a + bmat->i[j];
1370         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         if (sum > ntemp) ntemp = sum;
1374       }
1375       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1376     } else {
1377       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378     }
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatTranspose_MPIAIJ"
1385 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386 {
1387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1388   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1389   PetscErrorCode ierr;
1390   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391   Mat            B;
1392   PetscScalar    *array;
1393 
1394   PetscFunctionBegin;
1395   if (!matout && M != N) {
1396     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397   }
1398 
1399   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1400   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1401   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1402 
1403   /* copy over the A part */
1404   Aloc = (Mat_SeqAIJ*)a->A->data;
1405   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1406   row = a->rstart;
1407   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1408   for (i=0; i<m; i++) {
1409     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1410     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1411   }
1412   aj = Aloc->j;
1413   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1414 
1415   /* copy over the B part */
1416   Aloc = (Mat_SeqAIJ*)a->B->data;
1417   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1418   row  = a->rstart;
1419   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1420   ct   = cols;
1421   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1422   for (i=0; i<m; i++) {
1423     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1424     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1425   }
1426   ierr = PetscFree(ct);CHKERRQ(ierr);
1427   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1429   if (matout) {
1430     *matout = B;
1431   } else {
1432     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1439 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1440 {
1441   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1442   Mat            a = aij->A,b = aij->B;
1443   PetscErrorCode ierr;
1444   PetscInt       s1,s2,s3;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1448   if (rr) {
1449     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1450     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1451     /* Overlap communication with computation. */
1452     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1453   }
1454   if (ll) {
1455     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1456     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1457     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1458   }
1459   /* scale  the diagonal block */
1460   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1461 
1462   if (rr) {
1463     /* Do a scatter end and then right scale the off-diagonal block */
1464     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1465     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1466   }
1467 
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1474 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1475 {
1476   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   if (!a->rank) {
1481     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1488 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1489 {
1490   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1491   PetscErrorCode ierr;
1492 
1493   PetscFunctionBegin;
1494   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1495   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1500 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1501 {
1502   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1503   PetscErrorCode ierr;
1504 
1505   PetscFunctionBegin;
1506   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatEqual_MPIAIJ"
1512 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1513 {
1514   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1515   Mat            a,b,c,d;
1516   PetscTruth     flg;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   a = matA->A; b = matA->B;
1521   c = matB->A; d = matB->B;
1522 
1523   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1524   if (flg == PETSC_TRUE) {
1525     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1526   }
1527   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatCopy_MPIAIJ"
1533 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1534 {
1535   PetscErrorCode ierr;
1536   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1537   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1538 
1539   PetscFunctionBegin;
1540   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1541   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1542     /* because of the column compression in the off-processor part of the matrix a->B,
1543        the number of columns in a->B and b->B may be different, hence we cannot call
1544        the MatCopy() directly on the two parts. If need be, we can provide a more
1545        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1546        then copying the submatrices */
1547     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1548   } else {
1549     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1550     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1551   }
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1557 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1558 {
1559   PetscErrorCode ierr;
1560 
1561   PetscFunctionBegin;
1562   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 #include "petscblaslapack.h"
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatAXPY_MPIAIJ"
1569 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1570 {
1571   PetscErrorCode ierr;
1572   PetscInt       i;
1573   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1574   PetscBLASInt   bnz,one=1;
1575   Mat_SeqAIJ     *x,*y;
1576 
1577   PetscFunctionBegin;
1578   if (str == SAME_NONZERO_PATTERN) {
1579     x = (Mat_SeqAIJ *)xx->A->data;
1580     y = (Mat_SeqAIJ *)yy->A->data;
1581     bnz = (PetscBLASInt)x->nz;
1582     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1583     x = (Mat_SeqAIJ *)xx->B->data;
1584     y = (Mat_SeqAIJ *)yy->B->data;
1585     bnz = (PetscBLASInt)x->nz;
1586     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587   } else if (str == SUBSET_NONZERO_PATTERN) {
1588     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1589 
1590     x = (Mat_SeqAIJ *)xx->B->data;
1591     y = (Mat_SeqAIJ *)yy->B->data;
1592     if (y->xtoy && y->XtoY != xx->B) {
1593       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1594       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1595     }
1596     if (!y->xtoy) { /* get xtoy */
1597       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1598       y->XtoY = xx->B;
1599     }
1600     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1601   } else {
1602     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1603   }
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 /* -------------------------------------------------------------------*/
1608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1609        MatGetRow_MPIAIJ,
1610        MatRestoreRow_MPIAIJ,
1611        MatMult_MPIAIJ,
1612 /* 4*/ MatMultAdd_MPIAIJ,
1613        MatMultTranspose_MPIAIJ,
1614        MatMultTransposeAdd_MPIAIJ,
1615        0,
1616        0,
1617        0,
1618 /*10*/ 0,
1619        0,
1620        0,
1621        MatRelax_MPIAIJ,
1622        MatTranspose_MPIAIJ,
1623 /*15*/ MatGetInfo_MPIAIJ,
1624        MatEqual_MPIAIJ,
1625        MatGetDiagonal_MPIAIJ,
1626        MatDiagonalScale_MPIAIJ,
1627        MatNorm_MPIAIJ,
1628 /*20*/ MatAssemblyBegin_MPIAIJ,
1629        MatAssemblyEnd_MPIAIJ,
1630        0,
1631        MatSetOption_MPIAIJ,
1632        MatZeroEntries_MPIAIJ,
1633 /*25*/ MatZeroRows_MPIAIJ,
1634 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT)
1635        MatLUFactorSymbolic_MPIAIJ_TFS,
1636 #else
1637        0,
1638 #endif
1639        0,
1640        0,
1641        0,
1642 /*30*/ MatSetUpPreallocation_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*35*/ MatDuplicate_MPIAIJ,
1648        0,
1649        0,
1650        0,
1651        0,
1652 /*40*/ MatAXPY_MPIAIJ,
1653        MatGetSubMatrices_MPIAIJ,
1654        MatIncreaseOverlap_MPIAIJ,
1655        MatGetValues_MPIAIJ,
1656        MatCopy_MPIAIJ,
1657 /*45*/ MatPrintHelp_MPIAIJ,
1658        MatScale_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662 /*50*/ MatSetBlockSize_MPIAIJ,
1663        0,
1664        0,
1665        0,
1666        0,
1667 /*55*/ MatFDColoringCreate_MPIAIJ,
1668        0,
1669        MatSetUnfactored_MPIAIJ,
1670        0,
1671        0,
1672 /*60*/ MatGetSubMatrix_MPIAIJ,
1673        MatDestroy_MPIAIJ,
1674        MatView_MPIAIJ,
1675        MatGetPetscMaps_Petsc,
1676        0,
1677 /*65*/ 0,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*70*/ 0,
1683        0,
1684        MatSetColoring_MPIAIJ,
1685        MatSetValuesAdic_MPIAIJ,
1686        MatSetValuesAdifor_MPIAIJ,
1687 /*75*/ 0,
1688        0,
1689        0,
1690        0,
1691        0,
1692 /*80*/ 0,
1693        0,
1694        0,
1695        0,
1696 /*84*/ MatLoad_MPIAIJ,
1697        0,
1698        0,
1699        0,
1700        0,
1701        0,
1702 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1703        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1704        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1705        MatPtAP_MPIAIJ_MPIAIJ,
1706        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1707 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1708        0,
1709        0,
1710        0};
1711 
1712 /* ----------------------------------------------------------------------------------------*/
1713 
1714 EXTERN_C_BEGIN
1715 #undef __FUNCT__
1716 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1717 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1718 {
1719   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1724   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1725   PetscFunctionReturn(0);
1726 }
1727 EXTERN_C_END
1728 
1729 EXTERN_C_BEGIN
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1732 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1733 {
1734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1739   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 EXTERN_C_END
1743 
1744 #include "petscpc.h"
1745 EXTERN_C_BEGIN
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1748 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1749 {
1750   Mat_MPIAIJ     *b;
1751   PetscErrorCode ierr;
1752   PetscInt       i;
1753 
1754   PetscFunctionBegin;
1755   B->preallocated = PETSC_TRUE;
1756   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1757   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1758   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1759   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1760   if (d_nnz) {
1761     for (i=0; i<B->m; i++) {
1762       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1763     }
1764   }
1765   if (o_nnz) {
1766     for (i=0; i<B->m; i++) {
1767       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1768     }
1769   }
1770   b = (Mat_MPIAIJ*)B->data;
1771   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1772   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 EXTERN_C_END
1777 
1778 /*MC
1779    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1780 
1781    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1782    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1783   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1784   for communicators controlling multiple processes.  It is recommended that you call both of
1785   the above preallocation routines for simplicity.
1786 
1787    Options Database Keys:
1788 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1789 
1790   Level: beginner
1791 
1792 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1793 M*/
1794 
1795 EXTERN_C_BEGIN
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatCreate_AIJ"
1798 PetscErrorCode MatCreate_AIJ(Mat A)
1799 {
1800   PetscErrorCode ierr;
1801   PetscMPIInt    size;
1802 
1803   PetscFunctionBegin;
1804   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1805   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1806   if (size == 1) {
1807     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1808   } else {
1809     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1810   }
1811   PetscFunctionReturn(0);
1812 }
1813 EXTERN_C_END
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1817 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1818 {
1819   Mat            mat;
1820   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   *newmat       = 0;
1825   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1826   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1827   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1828   a    = (Mat_MPIAIJ*)mat->data;
1829 
1830   mat->factor       = matin->factor;
1831   mat->bs           = matin->bs;
1832   mat->assembled    = PETSC_TRUE;
1833   mat->insertmode   = NOT_SET_VALUES;
1834   mat->preallocated = PETSC_TRUE;
1835 
1836   a->rstart       = oldmat->rstart;
1837   a->rend         = oldmat->rend;
1838   a->cstart       = oldmat->cstart;
1839   a->cend         = oldmat->cend;
1840   a->size         = oldmat->size;
1841   a->rank         = oldmat->rank;
1842   a->donotstash   = oldmat->donotstash;
1843   a->roworiented  = oldmat->roworiented;
1844   a->rowindices   = 0;
1845   a->rowvalues    = 0;
1846   a->getrowactive = PETSC_FALSE;
1847 
1848   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1849   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1850   if (oldmat->colmap) {
1851 #if defined (PETSC_USE_CTABLE)
1852     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1853 #else
1854     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1855     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1856     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1857 #endif
1858   } else a->colmap = 0;
1859   if (oldmat->garray) {
1860     PetscInt len;
1861     len  = oldmat->B->n;
1862     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1863     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1864     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1865   } else a->garray = 0;
1866 
1867   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1868   PetscLogObjectParent(mat,a->lvec);
1869   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1870   PetscLogObjectParent(mat,a->Mvctx);
1871   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1872   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1873   PetscLogObjectParent(mat,a->A);
1874   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1875   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1876   PetscLogObjectParent(mat,a->B);
1877   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1878   *newmat = mat;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #include "petscsys.h"
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatLoad_MPIAIJ"
1886 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1887 {
1888   Mat            A;
1889   PetscScalar    *vals,*svals;
1890   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1891   MPI_Status     status;
1892   PetscErrorCode ierr;
1893   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm;
1894   PetscInt       i,nz,j,rstart,rend;
1895   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1896   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1897   PetscInt       cend,cstart,n;
1898   int            fd;
1899 
1900   PetscFunctionBegin;
1901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1902   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1903   if (!rank) {
1904     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1905     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1906     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1907     if (header[3] < 0) {
1908       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1909     }
1910   }
1911 
1912   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1913   M = header[1]; N = header[2];
1914   /* determine ownership of all rows */
1915   m = M/size + ((M % size) > rank);
1916   mm = (PetscMPIInt) m;
1917   ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1918   ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1919   rowners[0] = 0;
1920   for (i=2; i<=size; i++) {
1921     rowners[i] += rowners[i-1];
1922   }
1923   rstart = rowners[rank];
1924   rend   = rowners[rank+1];
1925 
1926   /* distribute row lengths to all processors */
1927   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1928   offlens = ourlens + (rend-rstart);
1929   if (!rank) {
1930     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1931     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1932     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1933     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1934     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1935     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1936   } else {
1937     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1938   }
1939 
1940   if (!rank) {
1941     /* calculate the number of nonzeros on each processor */
1942     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1943     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1944     for (i=0; i<size; i++) {
1945       for (j=rowners[i]; j< rowners[i+1]; j++) {
1946         procsnz[i] += rowlengths[j];
1947       }
1948     }
1949     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1950 
1951     /* determine max buffer needed and allocate it */
1952     maxnz = 0;
1953     for (i=0; i<size; i++) {
1954       maxnz = PetscMax(maxnz,procsnz[i]);
1955     }
1956     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1957 
1958     /* read in my part of the matrix column indices  */
1959     nz   = procsnz[0];
1960     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1961     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1962 
1963     /* read in every one elses and ship off */
1964     for (i=1; i<size; i++) {
1965       nz   = procsnz[i];
1966       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1967       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1968     }
1969     ierr = PetscFree(cols);CHKERRQ(ierr);
1970   } else {
1971     /* determine buffer space needed for message */
1972     nz = 0;
1973     for (i=0; i<m; i++) {
1974       nz += ourlens[i];
1975     }
1976     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1977 
1978     /* receive message of column indices*/
1979     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1980     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1981     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1982   }
1983 
1984   /* determine column ownership if matrix is not square */
1985   if (N != M) {
1986     n      = N/size + ((N % size) > rank);
1987     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1988     cstart = cend - n;
1989   } else {
1990     cstart = rstart;
1991     cend   = rend;
1992     n      = cend - cstart;
1993   }
1994 
1995   /* loop over local rows, determining number of off diagonal entries */
1996   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1997   jj = 0;
1998   for (i=0; i<m; i++) {
1999     for (j=0; j<ourlens[i]; j++) {
2000       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2001       jj++;
2002     }
2003   }
2004 
2005   /* create our matrix */
2006   for (i=0; i<m; i++) {
2007     ourlens[i] -= offlens[i];
2008   }
2009   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2010   ierr = MatSetType(A,type);CHKERRQ(ierr);
2011   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2012 
2013   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2014   for (i=0; i<m; i++) {
2015     ourlens[i] += offlens[i];
2016   }
2017 
2018   if (!rank) {
2019     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2020 
2021     /* read in my part of the matrix numerical values  */
2022     nz   = procsnz[0];
2023     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2024 
2025     /* insert into matrix */
2026     jj      = rstart;
2027     smycols = mycols;
2028     svals   = vals;
2029     for (i=0; i<m; i++) {
2030       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2031       smycols += ourlens[i];
2032       svals   += ourlens[i];
2033       jj++;
2034     }
2035 
2036     /* read in other processors and ship out */
2037     for (i=1; i<size; i++) {
2038       nz   = procsnz[i];
2039       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2040       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2041     }
2042     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2043   } else {
2044     /* receive numeric values */
2045     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2046 
2047     /* receive message of values*/
2048     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2049     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2050     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2051 
2052     /* insert into matrix */
2053     jj      = rstart;
2054     smycols = mycols;
2055     svals   = vals;
2056     for (i=0; i<m; i++) {
2057       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2058       smycols += ourlens[i];
2059       svals   += ourlens[i];
2060       jj++;
2061     }
2062   }
2063   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2064   ierr = PetscFree(vals);CHKERRQ(ierr);
2065   ierr = PetscFree(mycols);CHKERRQ(ierr);
2066   ierr = PetscFree(rowners);CHKERRQ(ierr);
2067 
2068   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   *newmat = A;
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2076 /*
2077     Not great since it makes two copies of the submatrix, first an SeqAIJ
2078   in local and then by concatenating the local matrices the end result.
2079   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2080 */
2081 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2082 {
2083   PetscErrorCode ierr;
2084   PetscMPIInt    rank,size;
2085   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2086   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2087   Mat            *local,M,Mreuse;
2088   PetscScalar    *vwork,*aa;
2089   MPI_Comm       comm = mat->comm;
2090   Mat_SeqAIJ     *aij;
2091 
2092 
2093   PetscFunctionBegin;
2094   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2096 
2097   if (call ==  MAT_REUSE_MATRIX) {
2098     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2099     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2100     local = &Mreuse;
2101     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2102   } else {
2103     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2104     Mreuse = *local;
2105     ierr   = PetscFree(local);CHKERRQ(ierr);
2106   }
2107 
2108   /*
2109       m - number of local rows
2110       n - number of columns (same on all processors)
2111       rstart - first row in new global matrix generated
2112   */
2113   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2114   if (call == MAT_INITIAL_MATRIX) {
2115     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2116     ii  = aij->i;
2117     jj  = aij->j;
2118 
2119     /*
2120         Determine the number of non-zeros in the diagonal and off-diagonal
2121         portions of the matrix in order to do correct preallocation
2122     */
2123 
2124     /* first get start and end of "diagonal" columns */
2125     if (csize == PETSC_DECIDE) {
2126       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2127       if (mglobal == n) { /* square matrix */
2128 	nlocal = m;
2129       } else {
2130         nlocal = n/size + ((n % size) > rank);
2131       }
2132     } else {
2133       nlocal = csize;
2134     }
2135     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2136     rstart = rend - nlocal;
2137     if (rank == size - 1 && rend != n) {
2138       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2139     }
2140 
2141     /* next, compute all the lengths */
2142     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2143     olens = dlens + m;
2144     for (i=0; i<m; i++) {
2145       jend = ii[i+1] - ii[i];
2146       olen = 0;
2147       dlen = 0;
2148       for (j=0; j<jend; j++) {
2149         if (*jj < rstart || *jj >= rend) olen++;
2150         else dlen++;
2151         jj++;
2152       }
2153       olens[i] = olen;
2154       dlens[i] = dlen;
2155     }
2156     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2157     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2158     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2159     ierr = PetscFree(dlens);CHKERRQ(ierr);
2160   } else {
2161     PetscInt ml,nl;
2162 
2163     M = *newmat;
2164     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2165     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2166     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2167     /*
2168          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2169        rather than the slower MatSetValues().
2170     */
2171     M->was_assembled = PETSC_TRUE;
2172     M->assembled     = PETSC_FALSE;
2173   }
2174   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2175   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2176   ii  = aij->i;
2177   jj  = aij->j;
2178   aa  = aij->a;
2179   for (i=0; i<m; i++) {
2180     row   = rstart + i;
2181     nz    = ii[i+1] - ii[i];
2182     cwork = jj;     jj += nz;
2183     vwork = aa;     aa += nz;
2184     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2185   }
2186 
2187   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2188   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2189   *newmat = M;
2190 
2191   /* save submatrix used in processor for next request */
2192   if (call ==  MAT_INITIAL_MATRIX) {
2193     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2194     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2195   }
2196 
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 EXTERN_C_BEGIN
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2203 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2204 {
2205   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2206   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2207   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2208   const PetscInt *JJ;
2209   PetscScalar    *values;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213 #if defined(PETSC_OPT_g)
2214   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2215 #endif
2216   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2217   o_nnz = d_nnz + m;
2218 
2219   for (i=0; i<m; i++) {
2220     nnz     = I[i+1]- I[i];
2221     JJ      = J + I[i];
2222     nnz_max = PetscMax(nnz_max,nnz);
2223 #if defined(PETSC_OPT_g)
2224     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2225 #endif
2226     for (j=0; j<nnz; j++) {
2227       if (*JJ >= cstart) break;
2228       JJ++;
2229     }
2230     d = 0;
2231     for (; j<nnz; j++) {
2232       if (*JJ++ >= cend) break;
2233       d++;
2234     }
2235     d_nnz[i] = d;
2236     o_nnz[i] = nnz - d;
2237   }
2238   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2239   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2240 
2241   if (v) values = (PetscScalar*)v;
2242   else {
2243     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2244     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2245   }
2246 
2247   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2248   for (i=0; i<m; i++) {
2249     ii   = i + rstart;
2250     nnz  = I[i+1]- I[i];
2251     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2252   }
2253   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2255   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2256 
2257   if (!v) {
2258     ierr = PetscFree(values);CHKERRQ(ierr);
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 EXTERN_C_END
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2266 /*@C
2267    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2268    (the default parallel PETSc format).
2269 
2270    Collective on MPI_Comm
2271 
2272    Input Parameters:
2273 +  A - the matrix
2274 .  i - the indices into j for the start of each local row (starts with zero)
2275 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2276 -  v - optional values in the matrix
2277 
2278    Level: developer
2279 
2280 .keywords: matrix, aij, compressed row, sparse, parallel
2281 
2282 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2283 @*/
2284 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2285 {
2286   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2287 
2288   PetscFunctionBegin;
2289   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2290   if (f) {
2291     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2292   }
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2298 /*@C
2299    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2300    (the default parallel PETSc format).  For good matrix assembly performance
2301    the user should preallocate the matrix storage by setting the parameters
2302    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2303    performance can be increased by more than a factor of 50.
2304 
2305    Collective on MPI_Comm
2306 
2307    Input Parameters:
2308 +  A - the matrix
2309 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2310            (same value is used for all local rows)
2311 .  d_nnz - array containing the number of nonzeros in the various rows of the
2312            DIAGONAL portion of the local submatrix (possibly different for each row)
2313            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2314            The size of this array is equal to the number of local rows, i.e 'm'.
2315            You must leave room for the diagonal entry even if it is zero.
2316 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2317            submatrix (same value is used for all local rows).
2318 -  o_nnz - array containing the number of nonzeros in the various rows of the
2319            OFF-DIAGONAL portion of the local submatrix (possibly different for
2320            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2321            structure. The size of this array is equal to the number
2322            of local rows, i.e 'm'.
2323 
2324    The AIJ format (also called the Yale sparse matrix format or
2325    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2326    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2327 
2328    The parallel matrix is partitioned such that the first m0 rows belong to
2329    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2330    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2331 
2332    The DIAGONAL portion of the local submatrix of a processor can be defined
2333    as the submatrix which is obtained by extraction the part corresponding
2334    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2335    first row that belongs to the processor, and r2 is the last row belonging
2336    to the this processor. This is a square mxm matrix. The remaining portion
2337    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2338 
2339    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2340 
2341    Example usage:
2342 
2343    Consider the following 8x8 matrix with 34 non-zero values, that is
2344    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2345    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2346    as follows:
2347 
2348 .vb
2349             1  2  0  |  0  3  0  |  0  4
2350     Proc0   0  5  6  |  7  0  0  |  8  0
2351             9  0 10  | 11  0  0  | 12  0
2352     -------------------------------------
2353            13  0 14  | 15 16 17  |  0  0
2354     Proc1   0 18  0  | 19 20 21  |  0  0
2355             0  0  0  | 22 23  0  | 24  0
2356     -------------------------------------
2357     Proc2  25 26 27  |  0  0 28  | 29  0
2358            30  0  0  | 31 32 33  |  0 34
2359 .ve
2360 
2361    This can be represented as a collection of submatrices as:
2362 
2363 .vb
2364       A B C
2365       D E F
2366       G H I
2367 .ve
2368 
2369    Where the submatrices A,B,C are owned by proc0, D,E,F are
2370    owned by proc1, G,H,I are owned by proc2.
2371 
2372    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2373    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2374    The 'M','N' parameters are 8,8, and have the same values on all procs.
2375 
2376    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2377    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2378    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2379    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2380    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2381    matrix, ans [DF] as another SeqAIJ matrix.
2382 
2383    When d_nz, o_nz parameters are specified, d_nz storage elements are
2384    allocated for every row of the local diagonal submatrix, and o_nz
2385    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2386    One way to choose d_nz and o_nz is to use the max nonzerors per local
2387    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2388    In this case, the values of d_nz,o_nz are:
2389 .vb
2390      proc0 : dnz = 2, o_nz = 2
2391      proc1 : dnz = 3, o_nz = 2
2392      proc2 : dnz = 1, o_nz = 4
2393 .ve
2394    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2395    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2396    for proc3. i.e we are using 12+15+10=37 storage locations to store
2397    34 values.
2398 
2399    When d_nnz, o_nnz parameters are specified, the storage is specified
2400    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2401    In the above case the values for d_nnz,o_nnz are:
2402 .vb
2403      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2404      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2405      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2406 .ve
2407    Here the space allocated is sum of all the above values i.e 34, and
2408    hence pre-allocation is perfect.
2409 
2410    Level: intermediate
2411 
2412 .keywords: matrix, aij, compressed row, sparse, parallel
2413 
2414 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2415           MPIAIJ
2416 @*/
2417 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2418 {
2419   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2420 
2421   PetscFunctionBegin;
2422   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2423   if (f) {
2424     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2425   }
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatCreateMPIAIJ"
2431 /*@C
2432    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2433    (the default parallel PETSc format).  For good matrix assembly performance
2434    the user should preallocate the matrix storage by setting the parameters
2435    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2436    performance can be increased by more than a factor of 50.
2437 
2438    Collective on MPI_Comm
2439 
2440    Input Parameters:
2441 +  comm - MPI communicator
2442 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2443            This value should be the same as the local size used in creating the
2444            y vector for the matrix-vector product y = Ax.
2445 .  n - This value should be the same as the local size used in creating the
2446        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2447        calculated if N is given) For square matrices n is almost always m.
2448 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2449 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2450 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2451            (same value is used for all local rows)
2452 .  d_nnz - array containing the number of nonzeros in the various rows of the
2453            DIAGONAL portion of the local submatrix (possibly different for each row)
2454            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2455            The size of this array is equal to the number of local rows, i.e 'm'.
2456            You must leave room for the diagonal entry even if it is zero.
2457 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2458            submatrix (same value is used for all local rows).
2459 -  o_nnz - array containing the number of nonzeros in the various rows of the
2460            OFF-DIAGONAL portion of the local submatrix (possibly different for
2461            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2462            structure. The size of this array is equal to the number
2463            of local rows, i.e 'm'.
2464 
2465    Output Parameter:
2466 .  A - the matrix
2467 
2468    Notes:
2469    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2470    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2471    storage requirements for this matrix.
2472 
2473    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2474    processor than it must be used on all processors that share the object for
2475    that argument.
2476 
2477    The user MUST specify either the local or global matrix dimensions
2478    (possibly both).
2479 
2480    The parallel matrix is partitioned such that the first m0 rows belong to
2481    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2482    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2483 
2484    The DIAGONAL portion of the local submatrix of a processor can be defined
2485    as the submatrix which is obtained by extraction the part corresponding
2486    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2487    first row that belongs to the processor, and r2 is the last row belonging
2488    to the this processor. This is a square mxm matrix. The remaining portion
2489    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2490 
2491    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2492 
2493    When calling this routine with a single process communicator, a matrix of
2494    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2495    type of communicator, use the construction mechanism:
2496      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2497 
2498    By default, this format uses inodes (identical nodes) when possible.
2499    We search for consecutive rows with the same nonzero structure, thereby
2500    reusing matrix information to achieve increased efficiency.
2501 
2502    Options Database Keys:
2503 +  -mat_aij_no_inode  - Do not use inodes
2504 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2505 -  -mat_aij_oneindex - Internally use indexing starting at 1
2506         rather than 0.  Note that when calling MatSetValues(),
2507         the user still MUST index entries starting at 0!
2508 
2509 
2510    Example usage:
2511 
2512    Consider the following 8x8 matrix with 34 non-zero values, that is
2513    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2514    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2515    as follows:
2516 
2517 .vb
2518             1  2  0  |  0  3  0  |  0  4
2519     Proc0   0  5  6  |  7  0  0  |  8  0
2520             9  0 10  | 11  0  0  | 12  0
2521     -------------------------------------
2522            13  0 14  | 15 16 17  |  0  0
2523     Proc1   0 18  0  | 19 20 21  |  0  0
2524             0  0  0  | 22 23  0  | 24  0
2525     -------------------------------------
2526     Proc2  25 26 27  |  0  0 28  | 29  0
2527            30  0  0  | 31 32 33  |  0 34
2528 .ve
2529 
2530    This can be represented as a collection of submatrices as:
2531 
2532 .vb
2533       A B C
2534       D E F
2535       G H I
2536 .ve
2537 
2538    Where the submatrices A,B,C are owned by proc0, D,E,F are
2539    owned by proc1, G,H,I are owned by proc2.
2540 
2541    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2542    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2543    The 'M','N' parameters are 8,8, and have the same values on all procs.
2544 
2545    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2546    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2547    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2548    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2549    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2550    matrix, ans [DF] as another SeqAIJ matrix.
2551 
2552    When d_nz, o_nz parameters are specified, d_nz storage elements are
2553    allocated for every row of the local diagonal submatrix, and o_nz
2554    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2555    One way to choose d_nz and o_nz is to use the max nonzerors per local
2556    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2557    In this case, the values of d_nz,o_nz are:
2558 .vb
2559      proc0 : dnz = 2, o_nz = 2
2560      proc1 : dnz = 3, o_nz = 2
2561      proc2 : dnz = 1, o_nz = 4
2562 .ve
2563    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2564    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2565    for proc3. i.e we are using 12+15+10=37 storage locations to store
2566    34 values.
2567 
2568    When d_nnz, o_nnz parameters are specified, the storage is specified
2569    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2570    In the above case the values for d_nnz,o_nnz are:
2571 .vb
2572      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2573      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2574      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2575 .ve
2576    Here the space allocated is sum of all the above values i.e 34, and
2577    hence pre-allocation is perfect.
2578 
2579    Level: intermediate
2580 
2581 .keywords: matrix, aij, compressed row, sparse, parallel
2582 
2583 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2584           MPIAIJ
2585 @*/
2586 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2587 {
2588   PetscErrorCode ierr;
2589   PetscMPIInt    size;
2590 
2591   PetscFunctionBegin;
2592   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2593   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2594   if (size > 1) {
2595     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2596     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2597   } else {
2598     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2599     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2600   }
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 #undef __FUNCT__
2605 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2606 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2607 {
2608   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2609 
2610   PetscFunctionBegin;
2611   *Ad     = a->A;
2612   *Ao     = a->B;
2613   *colmap = a->garray;
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2619 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2620 {
2621   PetscErrorCode ierr;
2622   PetscInt       i;
2623   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2624 
2625   PetscFunctionBegin;
2626   if (coloring->ctype == IS_COLORING_LOCAL) {
2627     ISColoringValue *allcolors,*colors;
2628     ISColoring      ocoloring;
2629 
2630     /* set coloring for diagonal portion */
2631     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2632 
2633     /* set coloring for off-diagonal portion */
2634     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2635     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2636     for (i=0; i<a->B->n; i++) {
2637       colors[i] = allcolors[a->garray[i]];
2638     }
2639     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2640     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2641     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2642     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2643   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2644     ISColoringValue *colors;
2645     PetscInt             *larray;
2646     ISColoring      ocoloring;
2647 
2648     /* set coloring for diagonal portion */
2649     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2650     for (i=0; i<a->A->n; i++) {
2651       larray[i] = i + a->cstart;
2652     }
2653     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2654     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2655     for (i=0; i<a->A->n; i++) {
2656       colors[i] = coloring->colors[larray[i]];
2657     }
2658     ierr = PetscFree(larray);CHKERRQ(ierr);
2659     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2660     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2661     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2662 
2663     /* set coloring for off-diagonal portion */
2664     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2665     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2666     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2667     for (i=0; i<a->B->n; i++) {
2668       colors[i] = coloring->colors[larray[i]];
2669     }
2670     ierr = PetscFree(larray);CHKERRQ(ierr);
2671     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2672     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2673     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2674   } else {
2675     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2676   }
2677 
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2684 {
2685   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2690   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2696 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2697 {
2698   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2699   PetscErrorCode ierr;
2700 
2701   PetscFunctionBegin;
2702   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2703   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 #undef __FUNCT__
2708 #define __FUNCT__ "MatMerge"
2709 /*@C
2710       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2711                  matrices from each processor
2712 
2713     Collective on MPI_Comm
2714 
2715    Input Parameters:
2716 +    comm - the communicators the parallel matrix will live on
2717 .    inmat - the input sequential matrices
2718 .    n - number of local columns (or PETSC_DECIDE)
2719 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2720 
2721    Output Parameter:
2722 .    outmat - the parallel matrix generated
2723 
2724     Level: advanced
2725 
2726    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2727 
2728 @*/
2729 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2730 {
2731   PetscErrorCode    ierr;
2732   PetscInt          m,N,i,rstart,nnz,I,*dnz,*onz;
2733   const PetscInt    *indx;
2734   const PetscScalar *values;
2735   PetscMap          columnmap,rowmap;
2736 
2737   PetscFunctionBegin;
2738     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2739   /*
2740   PetscMPIInt       rank;
2741   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2742   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2743   */
2744   if (scall == MAT_INITIAL_MATRIX){
2745     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2746     if (n == PETSC_DECIDE){
2747       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2748       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2749       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2750       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2751       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2752     }
2753 
2754     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2755     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2756     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2757     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2758     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2759 
2760     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2761     for (i=0;i<m;i++) {
2762       ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2763       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2764       ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2765     }
2766     /* This routine will ONLY return MPIAIJ type matrix */
2767     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2768     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2769     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2770     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2771 
2772   } else if (scall == MAT_REUSE_MATRIX){
2773     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2774   } else {
2775     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2776   }
2777 
2778   for (i=0;i<m;i++) {
2779     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2780     I    = i + rstart;
2781     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2782     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2783   }
2784   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2785   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787 
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 #undef __FUNCT__
2792 #define __FUNCT__ "MatFileSplit"
2793 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2794 {
2795   PetscErrorCode    ierr;
2796   PetscMPIInt       rank;
2797   PetscInt          m,N,i,rstart,nnz;
2798   size_t            len;
2799   const PetscInt    *indx;
2800   PetscViewer       out;
2801   char              *name;
2802   Mat               B;
2803   const PetscScalar *values;
2804 
2805   PetscFunctionBegin;
2806   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2807   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2808   /* Should this be the type of the diagonal block of A? */
2809   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2810   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2811   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2812   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2813   for (i=0;i<m;i++) {
2814     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2815     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2816     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2817   }
2818   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2820 
2821   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2822   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2823   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2824   sprintf(name,"%s.%d",outfile,rank);
2825   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2826   ierr = PetscFree(name);
2827   ierr = MatView(B,out);CHKERRQ(ierr);
2828   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2829   ierr = MatDestroy(B);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2836 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2837 {
2838   PetscErrorCode       ierr;
2839   Mat_Merge_SeqsToMPI  *merge;
2840   PetscObjectContainer container;
2841 
2842   PetscFunctionBegin;
2843   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2844   if (container) {
2845     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2846     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2848     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2850     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2851     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2852     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2853     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2854     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2855     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2856     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2857 
2858     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2859     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2860   }
2861   ierr = PetscFree(merge);CHKERRQ(ierr);
2862 
2863   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #include "src/mat/utils/freespace.h"
2868 #include "petscbt.h"
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatMerge_SeqsToMPI"
2871 /*@C
2872       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2873                  matrices from each processor
2874 
2875     Collective on MPI_Comm
2876 
2877    Input Parameters:
2878 +    comm - the communicators the parallel matrix will live on
2879 .    seqmat - the input sequential matrices
2880 .    m - number of local rows (or PETSC_DECIDE)
2881 .    n - number of local columns (or PETSC_DECIDE)
2882 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2883 
2884    Output Parameter:
2885 .    mpimat - the parallel matrix generated
2886 
2887     Level: advanced
2888 
2889    Notes:
2890      The dimensions of the sequential matrix in each processor MUST be the same.
2891      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2892      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2893 @*/
2894 static PetscEvent logkey_seqstompinum = 0;
2895 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2896 {
2897   PetscErrorCode       ierr;
2898   MPI_Comm             comm=mpimat->comm;
2899   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2900   PetscMPIInt          size,rank,taga,*len_s;
2901   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2902   PetscInt             proc,m;
2903   PetscInt             **buf_ri,**buf_rj;
2904   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2905   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2906   MPI_Request          *s_waits,*r_waits;
2907   MPI_Status           *status;
2908   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2909   Mat_Merge_SeqsToMPI  *merge;
2910   PetscObjectContainer container;
2911 
2912   PetscFunctionBegin;
2913   if (!logkey_seqstompinum) {
2914     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2915   }
2916   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2917 
2918   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2919   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2920 
2921   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2922   if (container) {
2923     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2924   }
2925   bi     = merge->bi;
2926   bj     = merge->bj;
2927   buf_ri = merge->buf_ri;
2928   buf_rj = merge->buf_rj;
2929 
2930   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2931   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2932   len_s  = merge->len_s;
2933 
2934   /* send and recv matrix values */
2935   /*-----------------------------*/
2936   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2937   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2938 
2939   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2940   for (proc=0,k=0; proc<size; proc++){
2941     if (!len_s[proc]) continue;
2942     i = owners[proc];
2943     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2944     k++;
2945   }
2946 
2947   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2948   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2949   ierr = PetscFree(status);CHKERRQ(ierr);
2950 
2951   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2952   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2953 
2954   /* insert mat values of mpimat */
2955   /*----------------------------*/
2956   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2957   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2958   nextrow = buf_ri_k + merge->nrecv;
2959   nextai  = nextrow + merge->nrecv;
2960 
2961   for (k=0; k<merge->nrecv; k++){
2962     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2963     nrows = *(buf_ri_k[k]);
2964     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2965     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2966   }
2967 
2968   /* set values of ba */
2969   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2970   for (i=0; i<m; i++) {
2971     arow = owners[rank] + i;
2972     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2973     bnzi = bi[i+1] - bi[i];
2974     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2975 
2976     /* add local non-zero vals of this proc's seqmat into ba */
2977     anzi = ai[arow+1] - ai[arow];
2978     aj   = a->j + ai[arow];
2979     aa   = a->a + ai[arow];
2980     nextaj = 0;
2981     for (j=0; nextaj<anzi; j++){
2982       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2983         ba_i[j] += aa[nextaj++];
2984       }
2985     }
2986 
2987     /* add received vals into ba */
2988     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2989       /* i-th row */
2990       if (i == *nextrow[k]) {
2991         anzi = *(nextai[k]+1) - *nextai[k];
2992         aj   = buf_rj[k] + *(nextai[k]);
2993         aa   = abuf_r[k] + *(nextai[k]);
2994         nextaj = 0;
2995         for (j=0; nextaj<anzi; j++){
2996           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2997             ba_i[j] += aa[nextaj++];
2998           }
2999         }
3000         nextrow[k]++; nextai[k]++;
3001       }
3002     }
3003     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3004   }
3005   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007 
3008   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3009   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3010   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3011   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 static PetscEvent logkey_seqstompisym = 0;
3015 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3016 {
3017   PetscErrorCode       ierr;
3018   Mat                  B_mpi;
3019   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3020   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3021   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3022   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3023   PetscInt             len,proc,*dnz,*onz;
3024   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3025   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3026   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3027   MPI_Status           *status;
3028   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3029   PetscBT              lnkbt;
3030   Mat_Merge_SeqsToMPI  *merge;
3031   PetscObjectContainer container;
3032 
3033   PetscFunctionBegin;
3034   if (!logkey_seqstompisym) {
3035     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3036   }
3037   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3038 
3039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3040   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3041 
3042   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3043   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3044 
3045   /* determine row ownership */
3046   /*---------------------------------------------------------*/
3047   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3048   if (m == PETSC_DECIDE) {
3049     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3050   } else {
3051     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3052   }
3053   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3054   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3055   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3056 
3057   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3058   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3059 
3060   /* determine the number of messages to send, their lengths */
3061   /*---------------------------------------------------------*/
3062   len_s  = merge->len_s;
3063 
3064   len = 0;  /* length of buf_si[] */
3065   merge->nsend = 0;
3066   for (proc=0; proc<size; proc++){
3067     len_si[proc] = 0;
3068     if (proc == rank){
3069       len_s[proc] = 0;
3070     } else {
3071       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3072       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3073     }
3074     if (len_s[proc]) {
3075       merge->nsend++;
3076       nrows = 0;
3077       for (i=owners[proc]; i<owners[proc+1]; i++){
3078         if (ai[i+1] > ai[i]) nrows++;
3079       }
3080       len_si[proc] = 2*(nrows+1);
3081       len += len_si[proc];
3082     }
3083   }
3084 
3085   /* determine the number and length of messages to receive for ij-structure */
3086   /*-------------------------------------------------------------------------*/
3087   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3088   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3089 
3090   /* post the Irecv of j-structure */
3091   /*-------------------------------*/
3092   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3093   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3094 
3095   /* post the Isend of j-structure */
3096   /*--------------------------------*/
3097   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3098   sj_waits = si_waits + merge->nsend;
3099 
3100   for (proc=0, k=0; proc<size; proc++){
3101     if (!len_s[proc]) continue;
3102     i = owners[proc];
3103     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3104     k++;
3105   }
3106 
3107   /* receives and sends of j-structure are complete */
3108   /*------------------------------------------------*/
3109   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3110   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3111 
3112   /* send and recv i-structure */
3113   /*---------------------------*/
3114   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3115   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3116 
3117   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3118   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3119   for (proc=0,k=0; proc<size; proc++){
3120     if (!len_s[proc]) continue;
3121     /* form outgoing message for i-structure:
3122          buf_si[0]:                 nrows to be sent
3123                [1:nrows]:           row index (global)
3124                [nrows+1:2*nrows+1]: i-structure index
3125     */
3126     /*-------------------------------------------*/
3127     nrows = len_si[proc]/2 - 1;
3128     buf_si_i    = buf_si + nrows+1;
3129     buf_si[0]   = nrows;
3130     buf_si_i[0] = 0;
3131     nrows = 0;
3132     for (i=owners[proc]; i<owners[proc+1]; i++){
3133       anzi = ai[i+1] - ai[i];
3134       if (anzi) {
3135         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3136         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3137         nrows++;
3138       }
3139     }
3140     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3141     k++;
3142     buf_si += len_si[proc];
3143   }
3144 
3145   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3146   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3147 
3148   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3149   for (i=0; i<merge->nrecv; i++){
3150     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3151   }
3152 
3153   ierr = PetscFree(len_si);CHKERRQ(ierr);
3154   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3155   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3156   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3157   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3158   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3159   ierr = PetscFree(status);CHKERRQ(ierr);
3160 
3161   /* compute a local seq matrix in each processor */
3162   /*----------------------------------------------*/
3163   /* allocate bi array and free space for accumulating nonzero column info */
3164   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3165   bi[0] = 0;
3166 
3167   /* create and initialize a linked list */
3168   nlnk = N+1;
3169   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3170 
3171   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3172   len = 0;
3173   len  = ai[owners[rank+1]] - ai[owners[rank]];
3174   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3175   current_space = free_space;
3176 
3177   /* determine symbolic info for each local row */
3178   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3179   nextrow = buf_ri_k + merge->nrecv;
3180   nextai  = nextrow + merge->nrecv;
3181   for (k=0; k<merge->nrecv; k++){
3182     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3183     nrows = *buf_ri_k[k];
3184     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3185     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3186   }
3187 
3188   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3189   len = 0;
3190   for (i=0;i<m;i++) {
3191     bnzi   = 0;
3192     /* add local non-zero cols of this proc's seqmat into lnk */
3193     arow   = owners[rank] + i;
3194     anzi   = ai[arow+1] - ai[arow];
3195     aj     = a->j + ai[arow];
3196     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3197     bnzi += nlnk;
3198     /* add received col data into lnk */
3199     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3200       if (i == *nextrow[k]) { /* i-th row */
3201         anzi = *(nextai[k]+1) - *nextai[k];
3202         aj   = buf_rj[k] + *nextai[k];
3203         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3204         bnzi += nlnk;
3205         nextrow[k]++; nextai[k]++;
3206       }
3207     }
3208     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3209 
3210     /* if free space is not available, make more free space */
3211     if (current_space->local_remaining<bnzi) {
3212       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3213       nspacedouble++;
3214     }
3215     /* copy data into free space, then initialize lnk */
3216     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3217     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3218 
3219     current_space->array           += bnzi;
3220     current_space->local_used      += bnzi;
3221     current_space->local_remaining -= bnzi;
3222 
3223     bi[i+1] = bi[i] + bnzi;
3224   }
3225 
3226   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3227 
3228   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3229   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3230   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3231 
3232   /* create symbolic parallel matrix B_mpi */
3233   /*---------------------------------------*/
3234   if (n==PETSC_DECIDE) {
3235     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3236   } else {
3237     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3238   }
3239   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3240   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3241   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3242 
3243   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3244   B_mpi->assembled     = PETSC_FALSE;
3245   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3246   merge->bi            = bi;
3247   merge->bj            = bj;
3248   merge->buf_ri        = buf_ri;
3249   merge->buf_rj        = buf_rj;
3250   merge->coi           = PETSC_NULL;
3251   merge->coj           = PETSC_NULL;
3252   merge->owners_co     = PETSC_NULL;
3253 
3254   /* attach the supporting struct to B_mpi for reuse */
3255   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3256   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3257   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3258   *mpimat = B_mpi;
3259   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3260   PetscFunctionReturn(0);
3261 }
3262 
3263 static PetscEvent logkey_seqstompi = 0;
3264 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3265 {
3266   PetscErrorCode   ierr;
3267 
3268   PetscFunctionBegin;
3269   if (!logkey_seqstompi) {
3270     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3271   }
3272   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3273   if (scall == MAT_INITIAL_MATRIX){
3274     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3275   }
3276   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3277   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3278   PetscFunctionReturn(0);
3279 }
3280 static PetscEvent logkey_getlocalmat = 0;
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatGetLocalMat"
3283 /*@C
3284      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3285 
3286     Not Collective
3287 
3288    Input Parameters:
3289 +    A - the matrix
3290 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3291 
3292    Output Parameter:
3293 .    A_loc - the local sequential matrix generated
3294 
3295     Level: developer
3296 
3297 @*/
3298 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3299 {
3300   PetscErrorCode  ierr;
3301   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3302   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3303   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3304   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3305   PetscInt        am=A->m,i,j,k,ncols,cstart=mpimat->cstart;
3306   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3307 
3308   PetscFunctionBegin;
3309   if (!logkey_getlocalmat) {
3310     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3311   }
3312   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3313   if (scall == MAT_INITIAL_MATRIX){
3314     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3315     ci[0] = 0;
3316     for (i=0; i<am; i++){
3317       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3318     }
3319     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3320     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3321     k = 0;
3322     for (i=0; i<am; i++) {
3323       ncols_o = bi[i+1] - bi[i];
3324       ncols_d = ai[i+1] - ai[i];
3325       ncols   = ncols_d + ncols_o;
3326       /* off-diagonal portion of A */
3327       for (jo=0; jo<ncols_o; jo++) {
3328         col = cmap[*bj];
3329         if (col >= cstart) break;
3330         cj[k]   = col; bj++;
3331         ca[k++] = *ba++;
3332       }
3333       /* diagonal portion of A */
3334       for (j=0; j<ncols_d; j++) {
3335         cj[k]   = cstart + *aj++;
3336         ca[k++] = *aa++;
3337       }
3338       /* off-diagonal portion of A */
3339       for (j=jo;j<ncols_o; j++) {
3340         cj[k]   = cmap[*bj++];
3341         ca[k++] = *ba++;
3342       }
3343     }
3344     /* put together the new matrix */
3345     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3346     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3347     /* Since these are PETSc arrays, change flags to free them as necessary. */
3348     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3349     mat->freedata = PETSC_TRUE;
3350     mat->nonew    = 0;
3351   } else if (scall == MAT_REUSE_MATRIX){
3352     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3353     ci = mat->i; cj = mat->j; ca = mat->a;
3354     for (i=0; i<am; i++) {
3355       /* off-diagonal portion of A */
3356       ncols_o = bi[i+1] - bi[i];
3357       for (jo=0; jo<ncols_o; jo++) {
3358         col = cmap[*bj];
3359         if (col >= cstart) break;
3360         *ca++ = *ba++;
3361       }
3362       /* diagonal portion of A */
3363       ncols = ai[i+1] - ai[i];
3364       for (j=0; j<ncols; j++) *ca++ = *aa++;
3365       /* off-diagonal portion of A */
3366       for (j=jo;j<ncols_o; j++) *ca++ = *ba++;
3367     }
3368   } else {
3369     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3370   }
3371 
3372   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3373   PetscFunctionReturn(0);
3374 }
3375 
3376 static PetscEvent logkey_getlocalmatcondensed = 0;
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatGetLocalMatCondensed"
3379 /*@C
3380      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3381 
3382     Not Collective
3383 
3384    Input Parameters:
3385 +    A - the matrix
3386 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3387 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3388 
3389    Output Parameter:
3390 .    A_loc - the local sequential matrix generated
3391 
3392     Level: developer
3393 
3394 @*/
3395 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3396 {
3397   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3398   PetscErrorCode    ierr;
3399   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3400   IS                isrowa,iscola;
3401   Mat               *aloc;
3402 
3403   PetscFunctionBegin;
3404   if (!logkey_getlocalmatcondensed) {
3405     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3406   }
3407   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3408   if (!row){
3409     start = a->rstart; end = a->rend;
3410     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3411   } else {
3412     isrowa = *row;
3413   }
3414   if (!col){
3415     start = a->cstart;
3416     cmap  = a->garray;
3417     nzA   = a->A->n;
3418     nzB   = a->B->n;
3419     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3420     ncols = 0;
3421     for (i=0; i<nzB; i++) {
3422       if (cmap[i] < start) idx[ncols++] = cmap[i];
3423       else break;
3424     }
3425     imark = i;
3426     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3427     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3428     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3429     ierr = PetscFree(idx);CHKERRQ(ierr);
3430   } else {
3431     iscola = *col;
3432   }
3433   if (scall != MAT_INITIAL_MATRIX){
3434     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3435     aloc[0] = *A_loc;
3436   }
3437   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3438   *A_loc = aloc[0];
3439   ierr = PetscFree(aloc);CHKERRQ(ierr);
3440   if (!row){
3441     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3442   }
3443   if (!col){
3444     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3445   }
3446   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 static PetscEvent logkey_GetBrowsOfAcols = 0;
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatGetBrowsOfAcols"
3453 /*@C
3454     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3455 
3456     Collective on Mat
3457 
3458    Input Parameters:
3459 +    A,B - the matrices in mpiaij format
3460 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3461 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3462 
3463    Output Parameter:
3464 +    rowb, colb - index sets of rows and columns of B to extract
3465 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3466 -    B_seq - the sequential matrix generated
3467 
3468     Level: developer
3469 
3470 @*/
3471 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3472 {
3473   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3474   PetscErrorCode    ierr;
3475   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3476   IS                isrowb,iscolb;
3477   Mat               *bseq;
3478 
3479   PetscFunctionBegin;
3480   if (a->cstart != b->rstart || a->cend != b->rend){
3481     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3482   }
3483   if (!logkey_GetBrowsOfAcols) {
3484     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3485   }
3486   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3487 
3488   if (scall == MAT_INITIAL_MATRIX){
3489     start = a->cstart;
3490     cmap  = a->garray;
3491     nzA   = a->A->n;
3492     nzB   = a->B->n;
3493     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3494     ncols = 0;
3495     for (i=0; i<nzB; i++) {  /* row < local row index */
3496       if (cmap[i] < start) idx[ncols++] = cmap[i];
3497       else break;
3498     }
3499     imark = i;
3500     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3501     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3502     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3503     ierr = PetscFree(idx);CHKERRQ(ierr);
3504     *brstart = imark;
3505     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3506   } else {
3507     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3508     isrowb = *rowb; iscolb = *colb;
3509     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3510     bseq[0] = *B_seq;
3511   }
3512   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3513   *B_seq = bseq[0];
3514   ierr = PetscFree(bseq);CHKERRQ(ierr);
3515   if (!rowb){
3516     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3517   } else {
3518     *rowb = isrowb;
3519   }
3520   if (!colb){
3521     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3522   } else {
3523     *colb = iscolb;
3524   }
3525   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 static PetscEvent logkey_GetBrowsOfAocols = 0;
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatGetBrowsOfAoCols"
3532 /*@C
3533     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3534     of the OFF-DIAGONAL portion of local A
3535 
3536     Collective on Mat
3537 
3538    Input Parameters:
3539 +    A,B - the matrices in mpiaij format
3540 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3541 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3542 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3543 
3544    Output Parameter:
3545 +    B_oth - the sequential matrix generated
3546 
3547     Level: developer
3548 
3549 @*/
3550 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3551 {
3552   VecScatter_MPI_General *gen_to,*gen_from;
3553   PetscErrorCode    ierr;
3554   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3555   Mat_SeqAIJ        *b_oth;
3556   VecScatter        ctx=a->Mvctx;
3557   MPI_Comm          comm=ctx->comm;
3558   PetscMPIInt       *rprocs,*sprocs,tag=ctx->tag,rank;
3559   PetscInt          *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3560   PetscScalar       *rvalues,*svalues,*b_otha,*bufa,*bufA;
3561   PetscInt          i,j,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3562   MPI_Request       *rwaits,*swaits;
3563   MPI_Status        *sstatus,rstatus;
3564   const PetscInt    *cols;
3565   const PetscScalar *vals;
3566 
3567   PetscFunctionBegin;
3568   if (a->cstart != b->rstart || a->cend != b->rend){
3569     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3570   }
3571   if (!logkey_GetBrowsOfAocols) {
3572     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3573   }
3574   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3575   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3576 
3577   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3578   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3579   rvalues  = gen_from->values; /* holds the length of sending row */
3580   svalues  = gen_to->values;   /* holds the length of receiving row */
3581   nrecvs   = gen_from->n;
3582   nsends   = gen_to->n;
3583   rwaits   = gen_from->requests;
3584   swaits   = gen_to->requests;
3585   rrow     = gen_from->indices; /* local row index to be received */
3586   srow     = gen_to->indices;   /* local row index to be sent */
3587   rstarts  = gen_from->starts;
3588   sstarts  = gen_to->starts;
3589   rprocs   = gen_from->procs;
3590   sprocs   = gen_to->procs;
3591   sstatus  = gen_to->sstatus;
3592 
3593   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3594   if (scall == MAT_INITIAL_MATRIX){
3595     /* i-array */
3596     /*---------*/
3597     /*  post receives */
3598     for (i=0; i<nrecvs; i++){
3599       rowlen = (PetscInt*)rvalues + rstarts[i];
3600       nrows = rstarts[i+1]-rstarts[i];
3601       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3602     }
3603 
3604     /* pack the outgoing message */
3605     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3606     rstartsj = sstartsj + nsends +1;
3607     sstartsj[0] = 0;  rstartsj[0] = 0;
3608     len = 0; /* total length of j or a array to be sent */
3609     k = 0;
3610     for (i=0; i<nsends; i++){
3611       rowlen = (PetscInt*)svalues + sstarts[i];
3612       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3613       for (j=0; j<nrows; j++) {
3614         row = srow[k] + b->rowners[rank]; /* global row idx */
3615         ierr = MatGetRow(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3616         len += rowlen[j];
3617         ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3618         k++;
3619       }
3620       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3621        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3622     }
3623     /* recvs and sends of i-array are completed */
3624     i = nrecvs;
3625     while (i--) {
3626       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3627     }
3628     if (nsends) {
3629       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3630     }
3631     /* allocate buffers for sending j and a arrays */
3632     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3633     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3634 
3635     /* create i-array of B_oth */
3636     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3637     b_othi[0] = 0;
3638     len = 0; /* total length of j or a array to be received */
3639     k = 0;
3640     for (i=0; i<nrecvs; i++){
3641       rowlen = (PetscInt*)rvalues + rstarts[i];
3642       nrows = rstarts[i+1]-rstarts[i];
3643       for (j=0; j<nrows; j++) {
3644         b_othi[k+1] = b_othi[k] + rowlen[j];
3645         len += rowlen[j]; k++;
3646       }
3647       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3648     }
3649 
3650     /* allocate space for j and a arrrays of B_oth */
3651     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3652     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3653 
3654     /* j-array */
3655     /*---------*/
3656     /*  post receives of j-array */
3657     for (i=0; i<nrecvs; i++){
3658       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3659       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3660     }
3661     k = 0;
3662     for (i=0; i<nsends; i++){
3663       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3664       bufJ = bufj+sstartsj[i];
3665       for (j=0; j<nrows; j++) {
3666         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3667         ierr = MatGetRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3668         for (l=0; l<ncols; l++){
3669           *bufJ++ = cols[l];
3670         }
3671         ierr = MatRestoreRow(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3672       }
3673       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3674     }
3675 
3676     /* recvs and sends of j-array are completed */
3677     i = nrecvs;
3678     while (i--) {
3679       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3680     }
3681     if (nsends) {
3682       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3683     }
3684   } else if (scall == MAT_REUSE_MATRIX){
3685     sstartsj = *startsj;
3686     rstartsj = sstartsj + nsends +1;
3687     bufa     = *bufa_ptr;
3688     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3689     b_otha   = b_oth->a;
3690   } else {
3691     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3692   }
3693 
3694   /* a-array */
3695   /*---------*/
3696   /*  post receives of a-array */
3697   for (i=0; i<nrecvs; i++){
3698     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3699     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3700   }
3701   k = 0;
3702   for (i=0; i<nsends; i++){
3703     nrows = sstarts[i+1]-sstarts[i];
3704     bufA = bufa+sstartsj[i];
3705     for (j=0; j<nrows; j++) {
3706       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3707       ierr = MatGetRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3708       for (l=0; l<ncols; l++){
3709         *bufA++ = vals[l];
3710       }
3711       ierr = MatRestoreRow(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3712 
3713     }
3714     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3715   }
3716   /* recvs and sends of a-array are completed */
3717   i = nrecvs;
3718   while (i--) {
3719     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720   }
3721    if (nsends) {
3722     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3723   }
3724 
3725   if (scall == MAT_INITIAL_MATRIX){
3726     /* put together the new matrix */
3727     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3728 
3729     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3730     /* Since these are PETSc arrays, change flags to free them as necessary. */
3731     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3732     b_oth->freedata = PETSC_TRUE;
3733     b_oth->nonew    = 0;
3734 
3735     ierr = PetscFree(bufj);CHKERRQ(ierr);
3736     if (!startsj || !bufa_ptr){
3737       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3738       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3739     } else {
3740       *startsj  = sstartsj;
3741       *bufa_ptr = bufa;
3742     }
3743   }
3744   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3745 
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 /*MC
3750    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3751 
3752    Options Database Keys:
3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3754 
3755   Level: beginner
3756 
3757 .seealso: MatCreateMPIAIJ
3758 M*/
3759 
3760 EXTERN_C_BEGIN
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatCreate_MPIAIJ"
3763 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3764 {
3765   Mat_MPIAIJ     *b;
3766   PetscErrorCode ierr;
3767   PetscInt       i;
3768   PetscMPIInt    size;
3769 
3770   PetscFunctionBegin;
3771   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3772 
3773   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3774   B->data         = (void*)b;
3775   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3776   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3777   B->factor       = 0;
3778   B->assembled    = PETSC_FALSE;
3779   B->mapping      = 0;
3780 
3781   B->insertmode      = NOT_SET_VALUES;
3782   b->size            = size;
3783   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3784 
3785   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3786   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3787 
3788   /* the information in the maps duplicates the information computed below, eventually
3789      we should remove the duplicate information that is not contained in the maps */
3790   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3791   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3792 
3793   /* build local table of row and column ownerships */
3794   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3795   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3796   b->cowners = b->rowners + b->size + 2;
3797   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3798   b->rowners[0] = 0;
3799   for (i=2; i<=b->size; i++) {
3800     b->rowners[i] += b->rowners[i-1];
3801   }
3802   b->rstart = b->rowners[b->rank];
3803   b->rend   = b->rowners[b->rank+1];
3804   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3805   b->cowners[0] = 0;
3806   for (i=2; i<=b->size; i++) {
3807     b->cowners[i] += b->cowners[i-1];
3808   }
3809   b->cstart = b->cowners[b->rank];
3810   b->cend   = b->cowners[b->rank+1];
3811 
3812   /* build cache for off array entries formed */
3813   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3814   b->donotstash  = PETSC_FALSE;
3815   b->colmap      = 0;
3816   b->garray      = 0;
3817   b->roworiented = PETSC_TRUE;
3818 
3819   /* stuff used for matrix vector multiply */
3820   b->lvec      = PETSC_NULL;
3821   b->Mvctx     = PETSC_NULL;
3822 
3823   /* stuff for MatGetRow() */
3824   b->rowindices   = 0;
3825   b->rowvalues    = 0;
3826   b->getrowactive = PETSC_FALSE;
3827 
3828   /* Explicitly create 2 MATSEQAIJ matrices. */
3829   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3830   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3831   PetscLogObjectParent(B,b->A);
3832   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3833   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3834   PetscLogObjectParent(B,b->B);
3835 
3836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3837                                      "MatStoreValues_MPIAIJ",
3838                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3840                                      "MatRetrieveValues_MPIAIJ",
3841                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3843 				     "MatGetDiagonalBlock_MPIAIJ",
3844                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3846 				     "MatIsTranspose_MPIAIJ",
3847 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3849 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3850 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3852 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3853 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3855 				     "MatDiagonalScaleLocal_MPIAIJ",
3856 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 EXTERN_C_END
3860