xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a1dbd1ada563686aa79d3c8a365b3c1540de08e7)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403 
404   /* used by MatAXPY() */
405   a->xtoy = 0; b->xtoy = 0;
406   a->XtoY = 0; b->XtoY = 0;
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroRows_MPIAIJ"
426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427 {
428   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
429   PetscErrorCode ierr;
430   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431   PetscInt       i,N,*rows,*owners = l->rowners;
432   PetscInt       *nprocs,j,idx,nsends,row;
433   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434   PetscInt       *rvalues,count,base,slen,*source;
435   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436   MPI_Comm       comm = A->comm;
437   MPI_Request    *send_waits,*recv_waits;
438   MPI_Status     recv_status,*send_status;
439   IS             istmp;
440   PetscTruth     found;
441 
442   PetscFunctionBegin;
443   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
444   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
445 
446   /*  first count number of contributors to each processor */
447   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
448   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
464 
465   /* post receives:   */
466   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
468   for (i=0; i<nrecvs; i++) {
469     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
479   starts[0] = 0;
480   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481   for (i=0; i<N; i++) {
482     svalues[starts[owner[i]]++] = rows[i];
483   }
484   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
485 
486   starts[0] = 0;
487   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488   count = 0;
489   for (i=0; i<size; i++) {
490     if (nprocs[2*i+1]) {
491       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
492     }
493   }
494   ierr = PetscFree(starts);CHKERRQ(ierr);
495 
496   base = owners[rank];
497 
498   /*  wait on receives */
499   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
500   source = lens + nrecvs;
501   count  = nrecvs; slen = 0;
502   while (count) {
503     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
504     /* unpack receives into our local space */
505     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
506     source[imdex]  = recv_status.MPI_SOURCE;
507     lens[imdex]    = n;
508     slen          += n;
509     count--;
510   }
511   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
512 
513   /* move the data into the send scatter */
514   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
515   count = 0;
516   for (i=0; i<nrecvs; i++) {
517     values = rvalues + i*nmax;
518     for (j=0; j<lens[i]; j++) {
519       lrows[count++] = values[j] - base;
520     }
521   }
522   ierr = PetscFree(rvalues);CHKERRQ(ierr);
523   ierr = PetscFree(lens);CHKERRQ(ierr);
524   ierr = PetscFree(owner);CHKERRQ(ierr);
525   ierr = PetscFree(nprocs);CHKERRQ(ierr);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PetscLogObjectParent(A,istmp);
530 
531   /*
532         Zero the required rows. If the "diagonal block" of the matrix
533      is square and the user wishes to set the diagonal we use seperate
534      code so that MatSetValues() is not called for each diagonal allocating
535      new memory, thus calling lots of mallocs and slowing things down.
536 
537        Contributed by: Mathew Knepley
538   */
539   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
541   if (diag && (l->A->M == l->A->N)) {
542     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
543   } else if (diag) {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548     }
549     for (i = 0; i < slen; i++) {
550       row  = lrows[i] + rstart;
551       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
552     }
553     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555   } else {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557   }
558   ierr = ISDestroy(istmp);CHKERRQ(ierr);
559   ierr = PetscFree(lrows);CHKERRQ(ierr);
560 
561   /* wait on sends */
562   if (nsends) {
563     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
564     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
565     ierr = PetscFree(send_status);CHKERRQ(ierr);
566   }
567   ierr = PetscFree(send_waits);CHKERRQ(ierr);
568   ierr = PetscFree(svalues);CHKERRQ(ierr);
569 
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatMult_MPIAIJ"
575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576 {
577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
578   PetscErrorCode ierr;
579   PetscInt       nt;
580 
581   PetscFunctionBegin;
582   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
583   if (nt != A->n) {
584     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585   }
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_MPIAIJ"
595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   /* do nondiagonal part */
617   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 EXTERN_C_BEGIN
630 #undef __FUNCT__
631 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633 {
634   MPI_Comm       comm;
635   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637   IS             Me,Notme;
638   PetscErrorCode ierr;
639   PetscInt       M,N,first,last,*notme,i;
640   PetscMPIInt    size;
641 
642   PetscFunctionBegin;
643 
644   /* Easy test: symmetric diagonal block */
645   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
647   if (!*f) PetscFunctionReturn(0);
648   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
650   if (size == 1) PetscFunctionReturn(0);
651 
652   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
654   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
655   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
656   for (i=0; i<first; i++) notme[i] = i;
657   for (i=last; i<M; i++) notme[i-last+first] = i;
658   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
659   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
660   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
661   Aoff = Aoffs[0];
662   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
663   Boff = Boffs[0];
664   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
666   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
667   ierr = ISDestroy(Me);CHKERRQ(ierr);
668   ierr = ISDestroy(Notme);CHKERRQ(ierr);
669 
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   PetscErrorCode ierr;
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753 
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatView_MPIAIJ_Binary"
765 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
766 {
767   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
768   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
769   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
770   PetscErrorCode    ierr;
771   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
772   int               fd;
773   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
774   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
775   PetscScalar       *column_values;
776 
777   PetscFunctionBegin;
778   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
779   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
780   nz   = A->nz + B->nz;
781   if (!rank) {
782     header[0] = MAT_FILE_COOKIE;
783     header[1] = mat->M;
784     header[2] = mat->N;
785     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
786     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
787     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
788     /* get largest number of rows any processor has */
789     rlen = mat->m;
790     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
791     for (i=1; i<size; i++) {
792       rlen = PetscMax(rlen,range[i+1] - range[i]);
793     }
794   } else {
795     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
796     rlen = mat->m;
797   }
798 
799   /* load up the local row counts */
800   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
801   for (i=0; i<mat->m; i++) {
802     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
803   }
804 
805   /* store the row lengths to the file */
806   if (!rank) {
807     MPI_Status status;
808     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
809     for (i=1; i<size; i++) {
810       rlen = range[i+1] - range[i];
811       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
812       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
813     }
814   } else {
815     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
816   }
817   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
818 
819   /* load up the local column indices */
820   nzmax = nz; /* )th processor needs space a largest processor needs */
821   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
822   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
823   cnt  = 0;
824   for (i=0; i<mat->m; i++) {
825     for (j=B->i[i]; j<B->i[i+1]; j++) {
826       if ( (col = garray[B->j[j]]) > cstart) break;
827       column_indices[cnt++] = col;
828     }
829     for (k=A->i[i]; k<A->i[i+1]; k++) {
830       column_indices[cnt++] = A->j[k] + cstart;
831     }
832     for (; j<B->i[i+1]; j++) {
833       column_indices[cnt++] = garray[B->j[j]];
834     }
835   }
836   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
837 
838   /* store the column indices to the file */
839   if (!rank) {
840     MPI_Status status;
841     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
842     for (i=1; i<size; i++) {
843       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
844       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
845       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
846       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     }
848   } else {
849     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
850     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
851   }
852   ierr = PetscFree(column_indices);CHKERRQ(ierr);
853 
854   /* load up the local column values */
855   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
856   cnt  = 0;
857   for (i=0; i<mat->m; i++) {
858     for (j=B->i[i]; j<B->i[i+1]; j++) {
859       if ( garray[B->j[j]] > cstart) break;
860       column_values[cnt++] = B->a[j];
861     }
862     for (k=A->i[i]; k<A->i[i+1]; k++) {
863       column_values[cnt++] = A->a[k];
864     }
865     for (; j<B->i[i+1]; j++) {
866       column_values[cnt++] = B->a[j];
867     }
868   }
869   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
870 
871   /* store the column values to the file */
872   if (!rank) {
873     MPI_Status status;
874     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
875     for (i=1; i<size; i++) {
876       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
877       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
878       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
879       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     }
881   } else {
882     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
883     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
884   }
885   ierr = PetscFree(column_values);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
891 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
892 {
893   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
894   PetscErrorCode    ierr;
895   PetscMPIInt       rank = aij->rank,size = aij->size;
896   PetscTruth        isdraw,iascii,flg,isbinary;
897   PetscViewer       sviewer;
898   PetscViewerFormat format;
899 
900   PetscFunctionBegin;
901   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
904   if (iascii) {
905     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
906     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
907       MatInfo info;
908       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
909       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
910       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
911       if (flg) {
912         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
913 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
914       } else {
915         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
916 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
917       }
918       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
919       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
920       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
921       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
922       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
923       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
924       PetscFunctionReturn(0);
925     } else if (format == PETSC_VIEWER_ASCII_INFO) {
926       PetscFunctionReturn(0);
927     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
928       PetscFunctionReturn(0);
929     }
930   } else if (isbinary) {
931     if (size == 1) {
932       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934     } else {
935       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
936     }
937     PetscFunctionReturn(0);
938   } else if (isdraw) {
939     PetscDraw  draw;
940     PetscTruth isnull;
941     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
942     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
943   }
944 
945   if (size == 1) {
946     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
947     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
948   } else {
949     /* assemble the entire matrix onto first processor. */
950     Mat         A;
951     Mat_SeqAIJ *Aloc;
952     PetscInt         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
953     PetscScalar *a;
954 
955     if (!rank) {
956       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
957     } else {
958       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
959     }
960     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
961     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
962     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
963     PetscLogObjectParent(mat,A);
964 
965     /* copy over the A part */
966     Aloc = (Mat_SeqAIJ*)aij->A->data;
967     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row = aij->rstart;
969     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
973     }
974     aj = Aloc->j;
975     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
976 
977     /* copy over the B part */
978     Aloc = (Mat_SeqAIJ*)aij->B->data;
979     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
980     row  = aij->rstart;
981     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
982     ct   = cols;
983     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
984     for (i=0; i<m; i++) {
985       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
986       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
987     }
988     ierr = PetscFree(ct);CHKERRQ(ierr);
989     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     /*
992        Everyone has to call to draw the matrix since the graphics waits are
993        synchronized across all processors that share the PetscDraw object
994     */
995     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
998       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
999     }
1000     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(A);CHKERRQ(ierr);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatView_MPIAIJ"
1008 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1009 {
1010   PetscErrorCode ierr;
1011   PetscTruth     iascii,isdraw,issocket,isbinary;
1012 
1013   PetscFunctionBegin;
1014   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1015   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1016   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1017   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1018   if (iascii || isdraw || isbinary || issocket) {
1019     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1020   } else {
1021     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatRelax_MPIAIJ"
1030 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1031 {
1032   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1033   PetscErrorCode ierr;
1034   Vec            bb1;
1035   PetscScalar    mone=-1.0;
1036 
1037   PetscFunctionBegin;
1038   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1039 
1040   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1041 
1042   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1043     if (flag & SOR_ZERO_INITIAL_GUESS) {
1044       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1045       its--;
1046     }
1047 
1048     while (its--) {
1049       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051 
1052       /* update rhs: bb1 = bb - B*x */
1053       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1054       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1055 
1056       /* local sweep */
1057       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1058       CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1075       CHKERRQ(ierr);
1076     }
1077   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1078     if (flag & SOR_ZERO_INITIAL_GUESS) {
1079       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1080       its--;
1081     }
1082     while (its--) {
1083       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1084       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085 
1086       /* update rhs: bb1 = bb - B*x */
1087       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1088       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1089 
1090       /* local sweep */
1091       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1092       CHKERRQ(ierr);
1093     }
1094   } else {
1095     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1096   }
1097 
1098   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1104 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1105 {
1106   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1107   Mat            A = mat->A,B = mat->B;
1108   PetscErrorCode ierr;
1109   PetscReal      isend[5],irecv[5];
1110 
1111   PetscFunctionBegin;
1112   info->block_size     = 1.0;
1113   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1114   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1115   isend[3] = info->memory;  isend[4] = info->mallocs;
1116   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1117   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1118   isend[3] += info->memory;  isend[4] += info->mallocs;
1119   if (flag == MAT_LOCAL) {
1120     info->nz_used      = isend[0];
1121     info->nz_allocated = isend[1];
1122     info->nz_unneeded  = isend[2];
1123     info->memory       = isend[3];
1124     info->mallocs      = isend[4];
1125   } else if (flag == MAT_GLOBAL_MAX) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else if (flag == MAT_GLOBAL_SUM) {
1133     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1134     info->nz_used      = irecv[0];
1135     info->nz_allocated = irecv[1];
1136     info->nz_unneeded  = irecv[2];
1137     info->memory       = irecv[3];
1138     info->mallocs      = irecv[4];
1139   }
1140   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1141   info->fill_ratio_needed = 0;
1142   info->factor_mallocs    = 0;
1143   info->rows_global       = (double)matin->M;
1144   info->columns_global    = (double)matin->N;
1145   info->rows_local        = (double)matin->m;
1146   info->columns_local     = (double)matin->N;
1147 
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatSetOption_MPIAIJ"
1153 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1154 {
1155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   switch (op) {
1160   case MAT_NO_NEW_NONZERO_LOCATIONS:
1161   case MAT_YES_NEW_NONZERO_LOCATIONS:
1162   case MAT_COLUMNS_UNSORTED:
1163   case MAT_COLUMNS_SORTED:
1164   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1165   case MAT_KEEP_ZEROED_ROWS:
1166   case MAT_NEW_NONZERO_LOCATION_ERR:
1167   case MAT_USE_INODES:
1168   case MAT_DO_NOT_USE_INODES:
1169   case MAT_IGNORE_ZERO_ENTRIES:
1170     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1171     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1172     break;
1173   case MAT_ROW_ORIENTED:
1174     a->roworiented = PETSC_TRUE;
1175     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1176     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1177     break;
1178   case MAT_ROWS_SORTED:
1179   case MAT_ROWS_UNSORTED:
1180   case MAT_YES_NEW_DIAGONALS:
1181     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1182     break;
1183   case MAT_COLUMN_ORIENTED:
1184     a->roworiented = PETSC_FALSE;
1185     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1186     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1187     break;
1188   case MAT_IGNORE_OFF_PROC_ENTRIES:
1189     a->donotstash = PETSC_TRUE;
1190     break;
1191   case MAT_NO_NEW_DIAGONALS:
1192     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1193   case MAT_SYMMETRIC:
1194   case MAT_STRUCTURALLY_SYMMETRIC:
1195   case MAT_HERMITIAN:
1196   case MAT_SYMMETRY_ETERNAL:
1197     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_NOT_SYMMETRIC:
1200   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1201   case MAT_NOT_HERMITIAN:
1202   case MAT_NOT_SYMMETRY_ETERNAL:
1203     break;
1204   default:
1205     SETERRQ(PETSC_ERR_SUP,"unknown option");
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatGetRow_MPIAIJ"
1212 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1213 {
1214   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1216   PetscErrorCode ierr;
1217   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1218   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1219   PetscInt       *cmap,*idx_p;
1220 
1221   PetscFunctionBegin;
1222   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1223   mat->getrowactive = PETSC_TRUE;
1224 
1225   if (!mat->rowvalues && (idx || v)) {
1226     /*
1227         allocate enough space to hold information from the longest row.
1228     */
1229     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1230     PetscInt     max = 1,tmp;
1231     for (i=0; i<matin->m; i++) {
1232       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1233       if (max < tmp) { max = tmp; }
1234     }
1235     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1236     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1237   }
1238 
1239   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1240   lrow = row - rstart;
1241 
1242   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1243   if (!v)   {pvA = 0; pvB = 0;}
1244   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1245   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   nztot = nzA + nzB;
1248 
1249   cmap  = mat->garray;
1250   if (v  || idx) {
1251     if (nztot) {
1252       /* Sort by increasing column numbers, assuming A and B already sorted */
1253       PetscInt imark = -1;
1254       if (v) {
1255         *v = v_p = mat->rowvalues;
1256         for (i=0; i<nzB; i++) {
1257           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1258           else break;
1259         }
1260         imark = i;
1261         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1262         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1263       }
1264       if (idx) {
1265         *idx = idx_p = mat->rowindices;
1266         if (imark > -1) {
1267           for (i=0; i<imark; i++) {
1268             idx_p[i] = cmap[cworkB[i]];
1269           }
1270         } else {
1271           for (i=0; i<nzB; i++) {
1272             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1273             else break;
1274           }
1275           imark = i;
1276         }
1277         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1278         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1279       }
1280     } else {
1281       if (idx) *idx = 0;
1282       if (v)   *v   = 0;
1283     }
1284   }
1285   *nz = nztot;
1286   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1293 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (aij->getrowactive == PETSC_FALSE) {
1299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1300   }
1301   aij->getrowactive = PETSC_FALSE;
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatNorm_MPIAIJ"
1307 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1308 {
1309   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1311   PetscErrorCode ierr;
1312   PetscInt       i,j,cstart = aij->cstart;
1313   PetscReal      sum = 0.0;
1314   PetscScalar    *v;
1315 
1316   PetscFunctionBegin;
1317   if (aij->size == 1) {
1318     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1319   } else {
1320     if (type == NORM_FROBENIUS) {
1321       v = amat->a;
1322       for (i=0; i<amat->nz; i++) {
1323 #if defined(PETSC_USE_COMPLEX)
1324         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1325 #else
1326         sum += (*v)*(*v); v++;
1327 #endif
1328       }
1329       v = bmat->a;
1330       for (i=0; i<bmat->nz; i++) {
1331 #if defined(PETSC_USE_COMPLEX)
1332         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1333 #else
1334         sum += (*v)*(*v); v++;
1335 #endif
1336       }
1337       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1338       *norm = sqrt(*norm);
1339     } else if (type == NORM_1) { /* max column norm */
1340       PetscReal *tmp,*tmp2;
1341       PetscInt    *jj,*garray = aij->garray;
1342       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1344       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1345       *norm = 0.0;
1346       v = amat->a; jj = amat->j;
1347       for (j=0; j<amat->nz; j++) {
1348         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1349       }
1350       v = bmat->a; jj = bmat->j;
1351       for (j=0; j<bmat->nz; j++) {
1352         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1353       }
1354       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1355       for (j=0; j<mat->N; j++) {
1356         if (tmp2[j] > *norm) *norm = tmp2[j];
1357       }
1358       ierr = PetscFree(tmp);CHKERRQ(ierr);
1359       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1360     } else if (type == NORM_INFINITY) { /* max row norm */
1361       PetscReal ntemp = 0.0;
1362       for (j=0; j<aij->A->m; j++) {
1363         v = amat->a + amat->i[j];
1364         sum = 0.0;
1365         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1366           sum += PetscAbsScalar(*v); v++;
1367         }
1368         v = bmat->a + bmat->i[j];
1369         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1370           sum += PetscAbsScalar(*v); v++;
1371         }
1372         if (sum > ntemp) ntemp = sum;
1373       }
1374       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1375     } else {
1376       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1377     }
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatTranspose_MPIAIJ"
1384 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1385 {
1386   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1387   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1388   PetscErrorCode ierr;
1389   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1390   Mat            B;
1391   PetscScalar    *array;
1392 
1393   PetscFunctionBegin;
1394   if (!matout && M != N) {
1395     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1396   }
1397 
1398   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1399   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1400   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1401 
1402   /* copy over the A part */
1403   Aloc = (Mat_SeqAIJ*)a->A->data;
1404   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1405   row = a->rstart;
1406   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1407   for (i=0; i<m; i++) {
1408     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1409     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1410   }
1411   aj = Aloc->j;
1412   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1413 
1414   /* copy over the B part */
1415   Aloc = (Mat_SeqAIJ*)a->B->data;
1416   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1417   row  = a->rstart;
1418   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1419   ct   = cols;
1420   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1421   for (i=0; i<m; i++) {
1422     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1423     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1424   }
1425   ierr = PetscFree(ct);CHKERRQ(ierr);
1426   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   if (matout) {
1429     *matout = B;
1430   } else {
1431     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1432   }
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1438 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1439 {
1440   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1441   Mat            a = aij->A,b = aij->B;
1442   PetscErrorCode ierr;
1443   PetscInt       s1,s2,s3;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1447   if (rr) {
1448     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1449     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1450     /* Overlap communication with computation. */
1451     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1452   }
1453   if (ll) {
1454     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1455     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1456     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1457   }
1458   /* scale  the diagonal block */
1459   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1460 
1461   if (rr) {
1462     /* Do a scatter end and then right scale the off-diagonal block */
1463     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1464     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1465   }
1466 
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1473 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1474 {
1475   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   if (!a->rank) {
1480     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1487 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs)
1488 {
1489   PetscFunctionBegin;
1490   *bs = 1;
1491   PetscFunctionReturn(0);
1492 }
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1495 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1496 {
1497   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatEqual_MPIAIJ"
1507 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1508 {
1509   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1510   Mat            a,b,c,d;
1511   PetscTruth     flg;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   a = matA->A; b = matA->B;
1516   c = matB->A; d = matB->B;
1517 
1518   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1519   if (flg == PETSC_TRUE) {
1520     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1521   }
1522   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatCopy_MPIAIJ"
1528 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1529 {
1530   PetscErrorCode ierr;
1531   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1532   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1533 
1534   PetscFunctionBegin;
1535   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1536   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1537     /* because of the column compression in the off-processor part of the matrix a->B,
1538        the number of columns in a->B and b->B may be different, hence we cannot call
1539        the MatCopy() directly on the two parts. If need be, we can provide a more
1540        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1541        then copying the submatrices */
1542     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1543   } else {
1544     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1545     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1552 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1553 {
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #include "petscblaslapack.h"
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatAXPY_MPIAIJ"
1564 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1565 {
1566   PetscErrorCode ierr;
1567   PetscInt       i;
1568   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1569   PetscBLASInt   bnz,one=1;
1570   Mat_SeqAIJ     *x,*y;
1571 
1572   PetscFunctionBegin;
1573   if (str == SAME_NONZERO_PATTERN) {
1574     x = (Mat_SeqAIJ *)xx->A->data;
1575     y = (Mat_SeqAIJ *)yy->A->data;
1576     bnz = (PetscBLASInt)x->nz;
1577     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1578     x = (Mat_SeqAIJ *)xx->B->data;
1579     y = (Mat_SeqAIJ *)yy->B->data;
1580     bnz = (PetscBLASInt)x->nz;
1581     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1582   } else if (str == SUBSET_NONZERO_PATTERN) {
1583     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1584 
1585     x = (Mat_SeqAIJ *)xx->B->data;
1586     y = (Mat_SeqAIJ *)yy->B->data;
1587     if (y->xtoy && y->XtoY != xx->B) {
1588       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1589       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1590     }
1591     if (!y->xtoy) { /* get xtoy */
1592       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1593       y->XtoY = xx->B;
1594     }
1595     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1596   } else {
1597     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* -------------------------------------------------------------------*/
1603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1604        MatGetRow_MPIAIJ,
1605        MatRestoreRow_MPIAIJ,
1606        MatMult_MPIAIJ,
1607 /* 4*/ MatMultAdd_MPIAIJ,
1608        MatMultTranspose_MPIAIJ,
1609        MatMultTransposeAdd_MPIAIJ,
1610        0,
1611        0,
1612        0,
1613 /*10*/ 0,
1614        0,
1615        0,
1616        MatRelax_MPIAIJ,
1617        MatTranspose_MPIAIJ,
1618 /*15*/ MatGetInfo_MPIAIJ,
1619        MatEqual_MPIAIJ,
1620        MatGetDiagonal_MPIAIJ,
1621        MatDiagonalScale_MPIAIJ,
1622        MatNorm_MPIAIJ,
1623 /*20*/ MatAssemblyBegin_MPIAIJ,
1624        MatAssemblyEnd_MPIAIJ,
1625        0,
1626        MatSetOption_MPIAIJ,
1627        MatZeroEntries_MPIAIJ,
1628 /*25*/ MatZeroRows_MPIAIJ,
1629 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT)
1630        MatLUFactorSymbolic_MPIAIJ_TFS,
1631 #else
1632        0,
1633 #endif
1634        0,
1635        0,
1636        0,
1637 /*30*/ MatSetUpPreallocation_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*35*/ MatDuplicate_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*40*/ MatAXPY_MPIAIJ,
1648        MatGetSubMatrices_MPIAIJ,
1649        MatIncreaseOverlap_MPIAIJ,
1650        MatGetValues_MPIAIJ,
1651        MatCopy_MPIAIJ,
1652 /*45*/ MatPrintHelp_MPIAIJ,
1653        MatScale_MPIAIJ,
1654        0,
1655        0,
1656        0,
1657 /*50*/ MatGetBlockSize_MPIAIJ,
1658        0,
1659        0,
1660        0,
1661        0,
1662 /*55*/ MatFDColoringCreate_MPIAIJ,
1663        0,
1664        MatSetUnfactored_MPIAIJ,
1665        0,
1666        0,
1667 /*60*/ MatGetSubMatrix_MPIAIJ,
1668        MatDestroy_MPIAIJ,
1669        MatView_MPIAIJ,
1670        MatGetPetscMaps_Petsc,
1671        0,
1672 /*65*/ 0,
1673        0,
1674        0,
1675        0,
1676        0,
1677 /*70*/ 0,
1678        0,
1679        MatSetColoring_MPIAIJ,
1680        MatSetValuesAdic_MPIAIJ,
1681        MatSetValuesAdifor_MPIAIJ,
1682 /*75*/ 0,
1683        0,
1684        0,
1685        0,
1686        0,
1687 /*80*/ 0,
1688        0,
1689        0,
1690        0,
1691 /*84*/ MatLoad_MPIAIJ,
1692        0,
1693        0,
1694        0,
1695        0,
1696        0,
1697 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1698        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1699        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1700        MatPtAP_MPIAIJ_MPIAIJ,
1701        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1702 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1703        0,
1704        0,
1705        0};
1706 
1707 /* ----------------------------------------------------------------------------------------*/
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1728 {
1729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 #include "petscpc.h"
1740 EXTERN_C_BEGIN
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1744 {
1745   Mat_MPIAIJ     *b;
1746   PetscErrorCode ierr;
1747   PetscInt       i;
1748 
1749   PetscFunctionBegin;
1750   B->preallocated = PETSC_TRUE;
1751   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1752   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1753   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1754   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1755   if (d_nnz) {
1756     for (i=0; i<B->m; i++) {
1757       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1758     }
1759   }
1760   if (o_nnz) {
1761     for (i=0; i<B->m; i++) {
1762       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1763     }
1764   }
1765   b = (Mat_MPIAIJ*)B->data;
1766   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1767   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 EXTERN_C_END
1772 
1773 /*MC
1774    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1775 
1776    Options Database Keys:
1777 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1778 
1779   Level: beginner
1780 
1781 .seealso: MatCreateMPIAIJ
1782 M*/
1783 
1784 EXTERN_C_BEGIN
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatCreate_MPIAIJ"
1787 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1788 {
1789   Mat_MPIAIJ     *b;
1790   PetscErrorCode ierr;
1791   PetscInt       i;
1792   PetscMPIInt    size;
1793 
1794   PetscFunctionBegin;
1795   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1796 
1797   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1798   B->data         = (void*)b;
1799   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1800   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1801   B->factor       = 0;
1802   B->assembled    = PETSC_FALSE;
1803   B->mapping      = 0;
1804 
1805   B->insertmode      = NOT_SET_VALUES;
1806   b->size            = size;
1807   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1808 
1809   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1810   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1811 
1812   /* the information in the maps duplicates the information computed below, eventually
1813      we should remove the duplicate information that is not contained in the maps */
1814   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1815   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1816 
1817   /* build local table of row and column ownerships */
1818   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1819   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1820   b->cowners = b->rowners + b->size + 2;
1821   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1822   b->rowners[0] = 0;
1823   for (i=2; i<=b->size; i++) {
1824     b->rowners[i] += b->rowners[i-1];
1825   }
1826   b->rstart = b->rowners[b->rank];
1827   b->rend   = b->rowners[b->rank+1];
1828   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1829   b->cowners[0] = 0;
1830   for (i=2; i<=b->size; i++) {
1831     b->cowners[i] += b->cowners[i-1];
1832   }
1833   b->cstart = b->cowners[b->rank];
1834   b->cend   = b->cowners[b->rank+1];
1835 
1836   /* build cache for off array entries formed */
1837   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1838   b->donotstash  = PETSC_FALSE;
1839   b->colmap      = 0;
1840   b->garray      = 0;
1841   b->roworiented = PETSC_TRUE;
1842 
1843   /* stuff used for matrix vector multiply */
1844   b->lvec      = PETSC_NULL;
1845   b->Mvctx     = PETSC_NULL;
1846 
1847   /* stuff for MatGetRow() */
1848   b->rowindices   = 0;
1849   b->rowvalues    = 0;
1850   b->getrowactive = PETSC_FALSE;
1851 
1852   /* Explicitly create 2 MATSEQAIJ matrices. */
1853   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1854   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1855   PetscLogObjectParent(B,b->A);
1856   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1857   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1858   PetscLogObjectParent(B,b->B);
1859 
1860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1861                                      "MatStoreValues_MPIAIJ",
1862                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1864                                      "MatRetrieveValues_MPIAIJ",
1865                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1867 				     "MatGetDiagonalBlock_MPIAIJ",
1868                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1870 				     "MatIsTranspose_MPIAIJ",
1871 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1872   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1873 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1874 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1875   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1876 				     "MatDiagonalScaleLocal_MPIAIJ",
1877 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 EXTERN_C_END
1881 
1882 /*MC
1883    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1884 
1885    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1886    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1887   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1888   for communicators controlling multiple processes.  It is recommended that you call both of
1889   the above preallocation routines for simplicity.
1890 
1891    Options Database Keys:
1892 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1893 
1894   Level: beginner
1895 
1896 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1897 M*/
1898 
1899 EXTERN_C_BEGIN
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatCreate_AIJ"
1902 PetscErrorCode MatCreate_AIJ(Mat A)
1903 {
1904   PetscErrorCode ierr;
1905   PetscMPIInt    size;
1906 
1907   PetscFunctionBegin;
1908   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1909   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1910   if (size == 1) {
1911     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1912   } else {
1913     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 EXTERN_C_END
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1921 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1922 {
1923   Mat            mat;
1924   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   *newmat       = 0;
1929   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1930   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1931   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1932   a    = (Mat_MPIAIJ*)mat->data;
1933 
1934   mat->factor       = matin->factor;
1935   mat->assembled    = PETSC_TRUE;
1936   mat->insertmode   = NOT_SET_VALUES;
1937   mat->preallocated = PETSC_TRUE;
1938 
1939   a->rstart       = oldmat->rstart;
1940   a->rend         = oldmat->rend;
1941   a->cstart       = oldmat->cstart;
1942   a->cend         = oldmat->cend;
1943   a->size         = oldmat->size;
1944   a->rank         = oldmat->rank;
1945   a->donotstash   = oldmat->donotstash;
1946   a->roworiented  = oldmat->roworiented;
1947   a->rowindices   = 0;
1948   a->rowvalues    = 0;
1949   a->getrowactive = PETSC_FALSE;
1950 
1951   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1952   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1953   if (oldmat->colmap) {
1954 #if defined (PETSC_USE_CTABLE)
1955     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1956 #else
1957     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1958     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1959     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1960 #endif
1961   } else a->colmap = 0;
1962   if (oldmat->garray) {
1963     PetscInt len;
1964     len  = oldmat->B->n;
1965     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1966     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1967     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1968   } else a->garray = 0;
1969 
1970   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1971   PetscLogObjectParent(mat,a->lvec);
1972   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1973   PetscLogObjectParent(mat,a->Mvctx);
1974   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1975   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1976   PetscLogObjectParent(mat,a->A);
1977   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1978   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1979   PetscLogObjectParent(mat,a->B);
1980   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1981   *newmat = mat;
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #include "petscsys.h"
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatLoad_MPIAIJ"
1989 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1990 {
1991   Mat            A;
1992   PetscScalar    *vals,*svals;
1993   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1994   MPI_Status     status;
1995   PetscErrorCode ierr;
1996   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm;
1997   PetscInt       i,nz,j,rstart,rend;
1998   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1999   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
2000   PetscInt       cend,cstart,n;
2001   int            fd;
2002 
2003   PetscFunctionBegin;
2004   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2005   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2006   if (!rank) {
2007     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2008     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2009     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2010     if (header[3] < 0) {
2011       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
2012     }
2013   }
2014 
2015   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2016   M = header[1]; N = header[2];
2017   /* determine ownership of all rows */
2018   m = M/size + ((M % size) > rank);
2019   mm = (PetscMPIInt) m;
2020   ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2021   ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2022   rowners[0] = 0;
2023   for (i=2; i<=size; i++) {
2024     rowners[i] += rowners[i-1];
2025   }
2026   rstart = rowners[rank];
2027   rend   = rowners[rank+1];
2028 
2029   /* distribute row lengths to all processors */
2030   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
2031   offlens = ourlens + (rend-rstart);
2032   if (!rank) {
2033     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2034     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2035     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2036     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2037     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2038     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2039   } else {
2040     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
2041   }
2042 
2043   if (!rank) {
2044     /* calculate the number of nonzeros on each processor */
2045     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2046     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2047     for (i=0; i<size; i++) {
2048       for (j=rowners[i]; j< rowners[i+1]; j++) {
2049         procsnz[i] += rowlengths[j];
2050       }
2051     }
2052     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2053 
2054     /* determine max buffer needed and allocate it */
2055     maxnz = 0;
2056     for (i=0; i<size; i++) {
2057       maxnz = PetscMax(maxnz,procsnz[i]);
2058     }
2059     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2060 
2061     /* read in my part of the matrix column indices  */
2062     nz   = procsnz[0];
2063     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2064     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2065 
2066     /* read in every one elses and ship off */
2067     for (i=1; i<size; i++) {
2068       nz   = procsnz[i];
2069       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2070       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2071     }
2072     ierr = PetscFree(cols);CHKERRQ(ierr);
2073   } else {
2074     /* determine buffer space needed for message */
2075     nz = 0;
2076     for (i=0; i<m; i++) {
2077       nz += ourlens[i];
2078     }
2079     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2080 
2081     /* receive message of column indices*/
2082     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2083     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2084     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2085   }
2086 
2087   /* determine column ownership if matrix is not square */
2088   if (N != M) {
2089     n      = N/size + ((N % size) > rank);
2090     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2091     cstart = cend - n;
2092   } else {
2093     cstart = rstart;
2094     cend   = rend;
2095     n      = cend - cstart;
2096   }
2097 
2098   /* loop over local rows, determining number of off diagonal entries */
2099   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2100   jj = 0;
2101   for (i=0; i<m; i++) {
2102     for (j=0; j<ourlens[i]; j++) {
2103       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2104       jj++;
2105     }
2106   }
2107 
2108   /* create our matrix */
2109   for (i=0; i<m; i++) {
2110     ourlens[i] -= offlens[i];
2111   }
2112   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2113   ierr = MatSetType(A,type);CHKERRQ(ierr);
2114   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2115 
2116   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2117   for (i=0; i<m; i++) {
2118     ourlens[i] += offlens[i];
2119   }
2120 
2121   if (!rank) {
2122     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2123 
2124     /* read in my part of the matrix numerical values  */
2125     nz   = procsnz[0];
2126     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2127 
2128     /* insert into matrix */
2129     jj      = rstart;
2130     smycols = mycols;
2131     svals   = vals;
2132     for (i=0; i<m; i++) {
2133       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2134       smycols += ourlens[i];
2135       svals   += ourlens[i];
2136       jj++;
2137     }
2138 
2139     /* read in other processors and ship out */
2140     for (i=1; i<size; i++) {
2141       nz   = procsnz[i];
2142       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2143       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2144     }
2145     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2146   } else {
2147     /* receive numeric values */
2148     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2149 
2150     /* receive message of values*/
2151     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2152     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2153     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2154 
2155     /* insert into matrix */
2156     jj      = rstart;
2157     smycols = mycols;
2158     svals   = vals;
2159     for (i=0; i<m; i++) {
2160       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2161       smycols += ourlens[i];
2162       svals   += ourlens[i];
2163       jj++;
2164     }
2165   }
2166   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2167   ierr = PetscFree(vals);CHKERRQ(ierr);
2168   ierr = PetscFree(mycols);CHKERRQ(ierr);
2169   ierr = PetscFree(rowners);CHKERRQ(ierr);
2170 
2171   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   *newmat = A;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2179 /*
2180     Not great since it makes two copies of the submatrix, first an SeqAIJ
2181   in local and then by concatenating the local matrices the end result.
2182   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2183 */
2184 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2185 {
2186   PetscErrorCode ierr;
2187   PetscMPIInt    rank,size;
2188   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2189   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2190   Mat            *local,M,Mreuse;
2191   PetscScalar    *vwork,*aa;
2192   MPI_Comm       comm = mat->comm;
2193   Mat_SeqAIJ     *aij;
2194 
2195 
2196   PetscFunctionBegin;
2197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2199 
2200   if (call ==  MAT_REUSE_MATRIX) {
2201     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2202     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2203     local = &Mreuse;
2204     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2205   } else {
2206     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2207     Mreuse = *local;
2208     ierr   = PetscFree(local);CHKERRQ(ierr);
2209   }
2210 
2211   /*
2212       m - number of local rows
2213       n - number of columns (same on all processors)
2214       rstart - first row in new global matrix generated
2215   */
2216   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2217   if (call == MAT_INITIAL_MATRIX) {
2218     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2219     ii  = aij->i;
2220     jj  = aij->j;
2221 
2222     /*
2223         Determine the number of non-zeros in the diagonal and off-diagonal
2224         portions of the matrix in order to do correct preallocation
2225     */
2226 
2227     /* first get start and end of "diagonal" columns */
2228     if (csize == PETSC_DECIDE) {
2229       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2230       if (mglobal == n) { /* square matrix */
2231 	nlocal = m;
2232       } else {
2233         nlocal = n/size + ((n % size) > rank);
2234       }
2235     } else {
2236       nlocal = csize;
2237     }
2238     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2239     rstart = rend - nlocal;
2240     if (rank == size - 1 && rend != n) {
2241       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2242     }
2243 
2244     /* next, compute all the lengths */
2245     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2246     olens = dlens + m;
2247     for (i=0; i<m; i++) {
2248       jend = ii[i+1] - ii[i];
2249       olen = 0;
2250       dlen = 0;
2251       for (j=0; j<jend; j++) {
2252         if (*jj < rstart || *jj >= rend) olen++;
2253         else dlen++;
2254         jj++;
2255       }
2256       olens[i] = olen;
2257       dlens[i] = dlen;
2258     }
2259     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2260     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2261     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2262     ierr = PetscFree(dlens);CHKERRQ(ierr);
2263   } else {
2264     PetscInt ml,nl;
2265 
2266     M = *newmat;
2267     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2268     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2269     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2270     /*
2271          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2272        rather than the slower MatSetValues().
2273     */
2274     M->was_assembled = PETSC_TRUE;
2275     M->assembled     = PETSC_FALSE;
2276   }
2277   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2278   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2279   ii  = aij->i;
2280   jj  = aij->j;
2281   aa  = aij->a;
2282   for (i=0; i<m; i++) {
2283     row   = rstart + i;
2284     nz    = ii[i+1] - ii[i];
2285     cwork = jj;     jj += nz;
2286     vwork = aa;     aa += nz;
2287     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2288   }
2289 
2290   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2291   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2292   *newmat = M;
2293 
2294   /* save submatrix used in processor for next request */
2295   if (call ==  MAT_INITIAL_MATRIX) {
2296     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2297     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2298   }
2299 
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2305 /*@C
2306    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2307    (the default parallel PETSc format).  For good matrix assembly performance
2308    the user should preallocate the matrix storage by setting the parameters
2309    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2310    performance can be increased by more than a factor of 50.
2311 
2312    Collective on MPI_Comm
2313 
2314    Input Parameters:
2315 +  A - the matrix
2316 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2317            (same value is used for all local rows)
2318 .  d_nnz - array containing the number of nonzeros in the various rows of the
2319            DIAGONAL portion of the local submatrix (possibly different for each row)
2320            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2321            The size of this array is equal to the number of local rows, i.e 'm'.
2322            You must leave room for the diagonal entry even if it is zero.
2323 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2324            submatrix (same value is used for all local rows).
2325 -  o_nnz - array containing the number of nonzeros in the various rows of the
2326            OFF-DIAGONAL portion of the local submatrix (possibly different for
2327            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2328            structure. The size of this array is equal to the number
2329            of local rows, i.e 'm'.
2330 
2331    The AIJ format (also called the Yale sparse matrix format or
2332    compressed row storage), is fully compatible with standard Fortran 77
2333    storage.  That is, the stored row and column indices can begin at
2334    either one (as in Fortran) or zero.  See the users manual for details.
2335 
2336    The user MUST specify either the local or global matrix dimensions
2337    (possibly both).
2338 
2339    The parallel matrix is partitioned such that the first m0 rows belong to
2340    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2341    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2342 
2343    The DIAGONAL portion of the local submatrix of a processor can be defined
2344    as the submatrix which is obtained by extraction the part corresponding
2345    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2346    first row that belongs to the processor, and r2 is the last row belonging
2347    to the this processor. This is a square mxm matrix. The remaining portion
2348    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2349 
2350    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2351 
2352    By default, this format uses inodes (identical nodes) when possible.
2353    We search for consecutive rows with the same nonzero structure, thereby
2354    reusing matrix information to achieve increased efficiency.
2355 
2356    Options Database Keys:
2357 +  -mat_aij_no_inode  - Do not use inodes
2358 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2359 -  -mat_aij_oneindex - Internally use indexing starting at 1
2360         rather than 0.  Note that when calling MatSetValues(),
2361         the user still MUST index entries starting at 0!
2362 
2363    Example usage:
2364 
2365    Consider the following 8x8 matrix with 34 non-zero values, that is
2366    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2367    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2368    as follows:
2369 
2370 .vb
2371             1  2  0  |  0  3  0  |  0  4
2372     Proc0   0  5  6  |  7  0  0  |  8  0
2373             9  0 10  | 11  0  0  | 12  0
2374     -------------------------------------
2375            13  0 14  | 15 16 17  |  0  0
2376     Proc1   0 18  0  | 19 20 21  |  0  0
2377             0  0  0  | 22 23  0  | 24  0
2378     -------------------------------------
2379     Proc2  25 26 27  |  0  0 28  | 29  0
2380            30  0  0  | 31 32 33  |  0 34
2381 .ve
2382 
2383    This can be represented as a collection of submatrices as:
2384 
2385 .vb
2386       A B C
2387       D E F
2388       G H I
2389 .ve
2390 
2391    Where the submatrices A,B,C are owned by proc0, D,E,F are
2392    owned by proc1, G,H,I are owned by proc2.
2393 
2394    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2395    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2396    The 'M','N' parameters are 8,8, and have the same values on all procs.
2397 
2398    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2399    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2400    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2401    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2402    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2403    matrix, ans [DF] as another SeqAIJ matrix.
2404 
2405    When d_nz, o_nz parameters are specified, d_nz storage elements are
2406    allocated for every row of the local diagonal submatrix, and o_nz
2407    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2408    One way to choose d_nz and o_nz is to use the max nonzerors per local
2409    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2410    In this case, the values of d_nz,o_nz are:
2411 .vb
2412      proc0 : dnz = 2, o_nz = 2
2413      proc1 : dnz = 3, o_nz = 2
2414      proc2 : dnz = 1, o_nz = 4
2415 .ve
2416    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2417    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2418    for proc3. i.e we are using 12+15+10=37 storage locations to store
2419    34 values.
2420 
2421    When d_nnz, o_nnz parameters are specified, the storage is specified
2422    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2423    In the above case the values for d_nnz,o_nnz are:
2424 .vb
2425      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2426      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2427      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2428 .ve
2429    Here the space allocated is sum of all the above values i.e 34, and
2430    hence pre-allocation is perfect.
2431 
2432    Level: intermediate
2433 
2434 .keywords: matrix, aij, compressed row, sparse, parallel
2435 
2436 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2437 @*/
2438 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2439 {
2440   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2444   if (f) {
2445     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2446   }
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 #undef __FUNCT__
2451 #define __FUNCT__ "MatCreateMPIAIJ"
2452 /*@C
2453    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2454    (the default parallel PETSc format).  For good matrix assembly performance
2455    the user should preallocate the matrix storage by setting the parameters
2456    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2457    performance can be increased by more than a factor of 50.
2458 
2459    Collective on MPI_Comm
2460 
2461    Input Parameters:
2462 +  comm - MPI communicator
2463 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2464            This value should be the same as the local size used in creating the
2465            y vector for the matrix-vector product y = Ax.
2466 .  n - This value should be the same as the local size used in creating the
2467        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2468        calculated if N is given) For square matrices n is almost always m.
2469 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2470 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2471 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2472            (same value is used for all local rows)
2473 .  d_nnz - array containing the number of nonzeros in the various rows of the
2474            DIAGONAL portion of the local submatrix (possibly different for each row)
2475            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2476            The size of this array is equal to the number of local rows, i.e 'm'.
2477            You must leave room for the diagonal entry even if it is zero.
2478 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2479            submatrix (same value is used for all local rows).
2480 -  o_nnz - array containing the number of nonzeros in the various rows of the
2481            OFF-DIAGONAL portion of the local submatrix (possibly different for
2482            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2483            structure. The size of this array is equal to the number
2484            of local rows, i.e 'm'.
2485 
2486    Output Parameter:
2487 .  A - the matrix
2488 
2489    Notes:
2490    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2491    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2492    storage requirements for this matrix.
2493 
2494    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2495    processor than it must be used on all processors that share the object for
2496    that argument.
2497 
2498    The AIJ format (also called the Yale sparse matrix format or
2499    compressed row storage), is fully compatible with standard Fortran 77
2500    storage.  That is, the stored row and column indices can begin at
2501    either one (as in Fortran) or zero.  See the users manual for details.
2502 
2503    The user MUST specify either the local or global matrix dimensions
2504    (possibly both).
2505 
2506    The parallel matrix is partitioned such that the first m0 rows belong to
2507    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2508    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2509 
2510    The DIAGONAL portion of the local submatrix of a processor can be defined
2511    as the submatrix which is obtained by extraction the part corresponding
2512    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2513    first row that belongs to the processor, and r2 is the last row belonging
2514    to the this processor. This is a square mxm matrix. The remaining portion
2515    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2516 
2517    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2518 
2519    When calling this routine with a single process communicator, a matrix of
2520    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2521    type of communicator, use the construction mechanism:
2522      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2523 
2524    By default, this format uses inodes (identical nodes) when possible.
2525    We search for consecutive rows with the same nonzero structure, thereby
2526    reusing matrix information to achieve increased efficiency.
2527 
2528    Options Database Keys:
2529 +  -mat_aij_no_inode  - Do not use inodes
2530 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2531 -  -mat_aij_oneindex - Internally use indexing starting at 1
2532         rather than 0.  Note that when calling MatSetValues(),
2533         the user still MUST index entries starting at 0!
2534 
2535 
2536    Example usage:
2537 
2538    Consider the following 8x8 matrix with 34 non-zero values, that is
2539    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2540    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2541    as follows:
2542 
2543 .vb
2544             1  2  0  |  0  3  0  |  0  4
2545     Proc0   0  5  6  |  7  0  0  |  8  0
2546             9  0 10  | 11  0  0  | 12  0
2547     -------------------------------------
2548            13  0 14  | 15 16 17  |  0  0
2549     Proc1   0 18  0  | 19 20 21  |  0  0
2550             0  0  0  | 22 23  0  | 24  0
2551     -------------------------------------
2552     Proc2  25 26 27  |  0  0 28  | 29  0
2553            30  0  0  | 31 32 33  |  0 34
2554 .ve
2555 
2556    This can be represented as a collection of submatrices as:
2557 
2558 .vb
2559       A B C
2560       D E F
2561       G H I
2562 .ve
2563 
2564    Where the submatrices A,B,C are owned by proc0, D,E,F are
2565    owned by proc1, G,H,I are owned by proc2.
2566 
2567    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2568    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2569    The 'M','N' parameters are 8,8, and have the same values on all procs.
2570 
2571    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2572    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2573    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2574    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2575    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2576    matrix, ans [DF] as another SeqAIJ matrix.
2577 
2578    When d_nz, o_nz parameters are specified, d_nz storage elements are
2579    allocated for every row of the local diagonal submatrix, and o_nz
2580    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2581    One way to choose d_nz and o_nz is to use the max nonzerors per local
2582    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2583    In this case, the values of d_nz,o_nz are:
2584 .vb
2585      proc0 : dnz = 2, o_nz = 2
2586      proc1 : dnz = 3, o_nz = 2
2587      proc2 : dnz = 1, o_nz = 4
2588 .ve
2589    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2590    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2591    for proc3. i.e we are using 12+15+10=37 storage locations to store
2592    34 values.
2593 
2594    When d_nnz, o_nnz parameters are specified, the storage is specified
2595    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2596    In the above case the values for d_nnz,o_nnz are:
2597 .vb
2598      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2599      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2600      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2601 .ve
2602    Here the space allocated is sum of all the above values i.e 34, and
2603    hence pre-allocation is perfect.
2604 
2605    Level: intermediate
2606 
2607 .keywords: matrix, aij, compressed row, sparse, parallel
2608 
2609 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2610 @*/
2611 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2612 {
2613   PetscErrorCode ierr;
2614   PetscMPIInt    size;
2615 
2616   PetscFunctionBegin;
2617   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2618   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2619   if (size > 1) {
2620     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2621     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2622   } else {
2623     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2624     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2625   }
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 #undef __FUNCT__
2630 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2631 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2632 {
2633   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2634 
2635   PetscFunctionBegin;
2636   *Ad     = a->A;
2637   *Ao     = a->B;
2638   *colmap = a->garray;
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #undef __FUNCT__
2643 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2644 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2645 {
2646   PetscErrorCode ierr;
2647   PetscInt       i;
2648   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2649 
2650   PetscFunctionBegin;
2651   if (coloring->ctype == IS_COLORING_LOCAL) {
2652     ISColoringValue *allcolors,*colors;
2653     ISColoring      ocoloring;
2654 
2655     /* set coloring for diagonal portion */
2656     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2657 
2658     /* set coloring for off-diagonal portion */
2659     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2660     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2661     for (i=0; i<a->B->n; i++) {
2662       colors[i] = allcolors[a->garray[i]];
2663     }
2664     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2665     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2666     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2667     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2668   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2669     ISColoringValue *colors;
2670     PetscInt             *larray;
2671     ISColoring      ocoloring;
2672 
2673     /* set coloring for diagonal portion */
2674     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2675     for (i=0; i<a->A->n; i++) {
2676       larray[i] = i + a->cstart;
2677     }
2678     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2679     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2680     for (i=0; i<a->A->n; i++) {
2681       colors[i] = coloring->colors[larray[i]];
2682     }
2683     ierr = PetscFree(larray);CHKERRQ(ierr);
2684     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2685     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2686     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2687 
2688     /* set coloring for off-diagonal portion */
2689     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2690     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2691     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2692     for (i=0; i<a->B->n; i++) {
2693       colors[i] = coloring->colors[larray[i]];
2694     }
2695     ierr = PetscFree(larray);CHKERRQ(ierr);
2696     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2697     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2698     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2699   } else {
2700     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2701   }
2702 
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2708 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2709 {
2710   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2711   PetscErrorCode ierr;
2712 
2713   PetscFunctionBegin;
2714   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2715   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 #undef __FUNCT__
2720 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2721 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2722 {
2723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2724   PetscErrorCode ierr;
2725 
2726   PetscFunctionBegin;
2727   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2728   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 #undef __FUNCT__
2733 #define __FUNCT__ "MatMerge"
2734 /*@C
2735       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2736                  matrices from each processor
2737 
2738     Collective on MPI_Comm
2739 
2740    Input Parameters:
2741 +    comm - the communicators the parallel matrix will live on
2742 .    inmat - the input sequential matrices
2743 .    n - number of local columns (or PETSC_DECIDE)
2744 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2745 
2746    Output Parameter:
2747 .    outmat - the parallel matrix generated
2748 
2749     Level: advanced
2750 
2751    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2752 
2753 @*/
2754 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2755 {
2756   PetscErrorCode    ierr;
2757   PetscInt          m,N,i,rstart,nnz,I,*dnz,*onz;
2758   const PetscInt    *indx;
2759   const PetscScalar *values;
2760   PetscMap          columnmap,rowmap;
2761 
2762   PetscFunctionBegin;
2763     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2764   /*
2765   PetscMPIInt       rank;
2766   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2767   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2768   */
2769   if (scall == MAT_INITIAL_MATRIX){
2770     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2771     if (n == PETSC_DECIDE){
2772       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2773       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2774       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2775       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2776       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2777     }
2778 
2779     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2780     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2781     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2782     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2783     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2784 
2785     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2786     for (i=0;i<m;i++) {
2787       ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2788       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2789       ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2790     }
2791     /* This routine will ONLY return MPIAIJ type matrix */
2792     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2793     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2794     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2795     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2796 
2797   } else if (scall == MAT_REUSE_MATRIX){
2798     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2799   } else {
2800     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2801   }
2802 
2803   for (i=0;i<m;i++) {
2804     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2805     I    = i + rstart;
2806     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2807     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2808   }
2809   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2810   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812 
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatFileSplit"
2818 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2819 {
2820   PetscErrorCode    ierr;
2821   PetscMPIInt       rank;
2822   PetscInt          m,N,i,rstart,nnz;
2823   size_t            len;
2824   const PetscInt    *indx;
2825   PetscViewer       out;
2826   char              *name;
2827   Mat               B;
2828   const PetscScalar *values;
2829 
2830   PetscFunctionBegin;
2831   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2832   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2833   /* Should this be the type of the diagonal block of A? */
2834   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2835   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2836   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2837   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2838   for (i=0;i<m;i++) {
2839     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2840     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2841     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2842   }
2843   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845 
2846   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2847   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2848   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2849   sprintf(name,"%s.%d",outfile,rank);
2850   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2851   ierr = PetscFree(name);
2852   ierr = MatView(B,out);CHKERRQ(ierr);
2853   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2854   ierr = MatDestroy(B);CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2861 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2862 {
2863   PetscErrorCode       ierr;
2864   Mat_Merge_SeqsToMPI  *merge;
2865   PetscObjectContainer container;
2866 
2867   PetscFunctionBegin;
2868   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2869   if (container) {
2870     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2871     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2872     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2873     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2874     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2875     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2876     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2877     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2878     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2879     if (merge->ci){ierr = PetscFree(merge->ci);CHKERRQ(ierr);}
2880     if (merge->cj){ierr = PetscFree(merge->cj);CHKERRQ(ierr);}
2881     if (merge->C_seq){ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);}
2882 
2883     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2884     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2885   }
2886   ierr = PetscFree(merge);CHKERRQ(ierr);
2887 
2888   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 #include "src/mat/utils/freespace.h"
2893 #include "petscbt.h"
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMerge_SeqsToMPI"
2896 /*@C
2897       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2898                  matrices from each processor
2899 
2900     Collective on MPI_Comm
2901 
2902    Input Parameters:
2903 +    comm - the communicators the parallel matrix will live on
2904 .    seqmat - the input sequential matrices
2905 .    m - number of local rows (or PETSC_DECIDE)
2906 .    n - number of local columns (or PETSC_DECIDE)
2907 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2908 
2909    Output Parameter:
2910 .    mpimat - the parallel matrix generated
2911 
2912     Level: advanced
2913 
2914    Notes:
2915      The dimensions of the sequential matrix in each processor MUST be the same.
2916      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2917      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2918 @*/
2919 static PetscEvent logkey_seqstompinum = 0;
2920 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2921 {
2922   PetscErrorCode       ierr;
2923   MPI_Comm             comm=mpimat->comm;
2924   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2925   PetscMPIInt          size,rank,taga,*len_s;
2926   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2927   PetscInt             proc,m;
2928   PetscInt             **buf_ri,**buf_rj;
2929   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2930   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2931   MPI_Request          *s_waits,*r_waits;
2932   MPI_Status           *status;
2933   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2934   Mat_Merge_SeqsToMPI  *merge;
2935   PetscObjectContainer container;
2936 
2937   PetscFunctionBegin;
2938   if (!logkey_seqstompinum) {
2939     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2940   }
2941   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2942 
2943   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2944   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2945 
2946   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2947   if (container) {
2948     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2949   }
2950   bi     = merge->bi;
2951   bj     = merge->bj;
2952   buf_ri = merge->buf_ri;
2953   buf_rj = merge->buf_rj;
2954 
2955   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2956   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2957   len_s  = merge->len_s;
2958 
2959   /* send and recv matrix values */
2960   /*-----------------------------*/
2961   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2962   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2963 
2964   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2965   for (proc=0,k=0; proc<size; proc++){
2966     if (!len_s[proc]) continue;
2967     i = owners[proc];
2968     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2969     k++;
2970   }
2971 
2972   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2973   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2974   ierr = PetscFree(status);CHKERRQ(ierr);
2975 
2976   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2977   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2978 
2979   /* insert mat values of mpimat */
2980   /*----------------------------*/
2981   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2982   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2983   nextrow = buf_ri_k + merge->nrecv;
2984   nextai  = nextrow + merge->nrecv;
2985 
2986   for (k=0; k<merge->nrecv; k++){
2987     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2988     nrows = *(buf_ri_k[k]);
2989     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2990     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2991   }
2992 
2993   /* set values of ba */
2994   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2995   for (i=0; i<m; i++) {
2996     arow = owners[rank] + i;
2997     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2998     bnzi = bi[i+1] - bi[i];
2999     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3000 
3001     /* add local non-zero vals of this proc's seqmat into ba */
3002     anzi = ai[arow+1] - ai[arow];
3003     aj   = a->j + ai[arow];
3004     aa   = a->a + ai[arow];
3005     nextaj = 0;
3006     for (j=0; nextaj<anzi; j++){
3007       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3008         ba_i[j] += aa[nextaj++];
3009       }
3010     }
3011 
3012     /* add received vals into ba */
3013     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3014       /* i-th row */
3015       if (i == *nextrow[k]) {
3016         anzi = *(nextai[k]+1) - *nextai[k];
3017         aj   = buf_rj[k] + *(nextai[k]);
3018         aa   = abuf_r[k] + *(nextai[k]);
3019         nextaj = 0;
3020         for (j=0; nextaj<anzi; j++){
3021           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3022             ba_i[j] += aa[nextaj++];
3023           }
3024         }
3025         nextrow[k]++; nextai[k]++;
3026       }
3027     }
3028     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3029   }
3030   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3031   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032 
3033   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3034   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3035   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3036   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 static PetscEvent logkey_seqstompisym = 0;
3040 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3041 {
3042   PetscErrorCode       ierr;
3043   Mat                  B_mpi;
3044   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3045   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3046   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3047   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3048   PetscInt             len,proc,*dnz,*onz;
3049   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3050   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3051   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3052   MPI_Status           *status;
3053   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3054   PetscBT              lnkbt;
3055   Mat_Merge_SeqsToMPI  *merge;
3056   PetscObjectContainer container;
3057 
3058   PetscFunctionBegin;
3059   if (!logkey_seqstompisym) {
3060     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3061   }
3062   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3063 
3064   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3065   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3066 
3067   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3068   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3069 
3070   /* determine row ownership */
3071   /*---------------------------------------------------------*/
3072   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3073   if (m == PETSC_DECIDE) {
3074     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3075   } else {
3076     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3077   }
3078   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3079   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3080   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3081 
3082   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3083   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3084 
3085   /* determine the number of messages to send, their lengths */
3086   /*---------------------------------------------------------*/
3087   len_s  = merge->len_s;
3088 
3089   len = 0;  /* length of buf_si[] */
3090   merge->nsend = 0;
3091   for (proc=0; proc<size; proc++){
3092     len_si[proc] = 0;
3093     if (proc == rank){
3094       len_s[proc] = 0;
3095     } else {
3096       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3097       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3098     }
3099     if (len_s[proc]) {
3100       merge->nsend++;
3101       nrows = 0;
3102       for (i=owners[proc]; i<owners[proc+1]; i++){
3103         if (ai[i+1] > ai[i]) nrows++;
3104       }
3105       len_si[proc] = 2*(nrows+1);
3106       len += len_si[proc];
3107     }
3108   }
3109 
3110   /* determine the number and length of messages to receive for ij-structure */
3111   /*-------------------------------------------------------------------------*/
3112   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3113   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3114 
3115   /* post the Irecv of j-structure */
3116   /*-------------------------------*/
3117   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3118   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3119 
3120   /* post the Isend of j-structure */
3121   /*--------------------------------*/
3122   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3123   sj_waits = si_waits + merge->nsend;
3124 
3125   for (proc=0, k=0; proc<size; proc++){
3126     if (!len_s[proc]) continue;
3127     i = owners[proc];
3128     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3129     k++;
3130   }
3131 
3132   /* receives and sends of j-structure are complete */
3133   /*------------------------------------------------*/
3134   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3135   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3136 
3137   /* send and recv i-structure */
3138   /*---------------------------*/
3139   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3140   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3141 
3142   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3143   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3144   for (proc=0,k=0; proc<size; proc++){
3145     if (!len_s[proc]) continue;
3146     /* form outgoing message for i-structure:
3147          buf_si[0]:                 nrows to be sent
3148                [1:nrows]:           row index (global)
3149                [nrows+1:2*nrows+1]: i-structure index
3150     */
3151     /*-------------------------------------------*/
3152     nrows = len_si[proc]/2 - 1;
3153     buf_si_i    = buf_si + nrows+1;
3154     buf_si[0]   = nrows;
3155     buf_si_i[0] = 0;
3156     nrows = 0;
3157     for (i=owners[proc]; i<owners[proc+1]; i++){
3158       anzi = ai[i+1] - ai[i];
3159       if (anzi) {
3160         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3161         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3162         nrows++;
3163       }
3164     }
3165     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3166     k++;
3167     buf_si += len_si[proc];
3168   }
3169 
3170   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3171   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3172 
3173   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3174   for (i=0; i<merge->nrecv; i++){
3175     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3176   }
3177 
3178   ierr = PetscFree(len_si);CHKERRQ(ierr);
3179   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3180   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3181   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3182   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3183   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3184   ierr = PetscFree(status);CHKERRQ(ierr);
3185 
3186   /* compute a local seq matrix in each processor */
3187   /*----------------------------------------------*/
3188   /* allocate bi array and free space for accumulating nonzero column info */
3189   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3190   bi[0] = 0;
3191 
3192   /* create and initialize a linked list */
3193   nlnk = N+1;
3194   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3195 
3196   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3197   len = 0;
3198   len  = ai[owners[rank+1]] - ai[owners[rank]];
3199   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3200   current_space = free_space;
3201 
3202   /* determine symbolic info for each local row */
3203   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3204   nextrow = buf_ri_k + merge->nrecv;
3205   nextai  = nextrow + merge->nrecv;
3206   for (k=0; k<merge->nrecv; k++){
3207     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3208     nrows = *buf_ri_k[k];
3209     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3210     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3211   }
3212 
3213   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3214   len = 0;
3215   for (i=0;i<m;i++) {
3216     bnzi   = 0;
3217     /* add local non-zero cols of this proc's seqmat into lnk */
3218     arow   = owners[rank] + i;
3219     anzi   = ai[arow+1] - ai[arow];
3220     aj     = a->j + ai[arow];
3221     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3222     bnzi += nlnk;
3223     /* add received col data into lnk */
3224     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3225       if (i == *nextrow[k]) { /* i-th row */
3226         anzi = *(nextai[k]+1) - *nextai[k];
3227         aj   = buf_rj[k] + *nextai[k];
3228         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3229         bnzi += nlnk;
3230         nextrow[k]++; nextai[k]++;
3231       }
3232     }
3233     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3234 
3235     /* if free space is not available, make more free space */
3236     if (current_space->local_remaining<bnzi) {
3237       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3238       nspacedouble++;
3239     }
3240     /* copy data into free space, then initialize lnk */
3241     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3242     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3243 
3244     current_space->array           += bnzi;
3245     current_space->local_used      += bnzi;
3246     current_space->local_remaining -= bnzi;
3247 
3248     bi[i+1] = bi[i] + bnzi;
3249   }
3250 
3251   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3252 
3253   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3254   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3255   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3256 
3257   /* create symbolic parallel matrix B_mpi */
3258   /*---------------------------------------*/
3259   if (n==PETSC_DECIDE) {
3260     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3261   } else {
3262     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3263   }
3264   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3265   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3266   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3267 
3268   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3269   B_mpi->assembled     = PETSC_FALSE;
3270   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3271   merge->bi            = bi;
3272   merge->bj            = bj;
3273   merge->buf_ri        = buf_ri;
3274   merge->buf_rj        = buf_rj;
3275   merge->C_seq         = seqmat;
3276   merge->ci            = PETSC_NULL;
3277   merge->cj            = PETSC_NULL;
3278 
3279   /* attach the supporting struct to B_mpi for reuse */
3280   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3281   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3282   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3283   *mpimat = B_mpi;
3284   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 static PetscEvent logkey_seqstompi = 0;
3289 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3290 {
3291   PetscErrorCode   ierr;
3292 
3293   PetscFunctionBegin;
3294   if (!logkey_seqstompi) {
3295     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3296   }
3297   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3298   if (scall == MAT_INITIAL_MATRIX){
3299     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3300   }
3301   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3302   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3303   PetscFunctionReturn(0);
3304 }
3305 static PetscEvent logkey_getlocalmat = 0;
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatGetLocalMat"
3308 /*@C
3309      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3310 
3311     Not Collective
3312 
3313    Input Parameters:
3314 +    A - the matrix
3315 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3316 
3317    Output Parameter:
3318 .    A_loc - the local sequential matrix generated
3319 
3320     Level: developer
3321 
3322 @*/
3323 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3324 {
3325   PetscErrorCode  ierr;
3326   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3327   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3328   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3329   MatScalar       *aa=a->a,*ba=b->a;
3330   PetscInt        am=A->m,i,j,*nnz,nnz_max,ncols,*cols,cstart=mpimat->cstart;
3331   Mat             Aw;
3332 
3333   PetscFunctionBegin;
3334   if (!logkey_getlocalmat) {
3335     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3336   }
3337   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3338   if (scall == MAT_INITIAL_MATRIX){
3339     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
3340     nnz_max = 0;
3341     for (i=0; i<am; i++){
3342       nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3343       if (nnz[i] > nnz_max) nnz_max = nnz[i];
3344     }
3345     ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr);
3346     ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr);
3347     ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr);
3348   } else if (scall == MAT_REUSE_MATRIX){
3349     Aw = *A_loc;
3350     mat=(Mat_SeqAIJ*)Aw->data;
3351     nnz_max = 0;
3352     for (i=0; i<am; i++){
3353       j = mat->i[i+1] - mat->i[i];
3354       if (j > nnz_max) nnz_max = j;
3355     }
3356     /* clear old values in *A_loc */
3357     ierr = PetscMemzero(mat->a,mat->i[am]*sizeof(MatScalar));CHKERRQ(ierr);
3358   } else {
3359     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3360   }
3361   ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3362 
3363   for (i=0; i<am; i++) {
3364     /* diagonal portion of A */
3365     ncols = ai[i+1] - ai[i];
3366     for (j=0; j<ncols; j++) {cols[j] = cstart + *aj++;}
3367     ierr = MatSetValues(Aw,1,&i,ncols,cols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
3368     /* off-diagonal portion of A */
3369     ncols = bi[i+1] - bi[i];
3370     for (j=0; j<ncols; j++) cols[j] = cmap[*bj++]; /* cmap[*(bj+bi[i]+j)]; */
3371     ierr = MatSetValues(Aw,1,&i,ncols,cols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
3372   }
3373   ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3374   ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3375   ierr = PetscFree(cols);CHKERRQ(ierr);
3376   if (scall == MAT_INITIAL_MATRIX){
3377     ierr = PetscFree(nnz);CHKERRQ(ierr);
3378     *A_loc = Aw;
3379   }
3380 
3381   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 static PetscEvent logkey_getlocalmatcondensed = 0;
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatGetLocalMatCondensed"
3388 /*@C
3389      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3390 
3391     Not Collective
3392 
3393    Input Parameters:
3394 +    A - the matrix
3395 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3396 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3397 
3398    Output Parameter:
3399 .    A_loc - the local sequential matrix generated
3400 
3401     Level: developer
3402 
3403 @*/
3404 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3405 {
3406   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3407   PetscErrorCode    ierr;
3408   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3409   IS                isrowa,iscola;
3410   Mat               *aloc;
3411 
3412   PetscFunctionBegin;
3413   if (!logkey_getlocalmatcondensed) {
3414     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3415   }
3416   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3417   if (!row){
3418     start = a->rstart; end = a->rend;
3419     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3420   } else {
3421     isrowa = *row;
3422   }
3423   if (!col){
3424     start = a->cstart;
3425     cmap  = a->garray;
3426     nzA   = a->A->n;
3427     nzB   = a->B->n;
3428     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3429     ncols = 0;
3430     for (i=0; i<nzB; i++) {
3431       if (cmap[i] < start) idx[ncols++] = cmap[i];
3432       else break;
3433     }
3434     imark = i;
3435     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3436     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3437     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3438     ierr = PetscFree(idx);CHKERRQ(ierr);
3439   } else {
3440     iscola = *col;
3441   }
3442   if (scall != MAT_INITIAL_MATRIX){
3443     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3444     aloc[0] = *A_loc;
3445   }
3446   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3447   *A_loc = aloc[0];
3448   ierr = PetscFree(aloc);CHKERRQ(ierr);
3449   if (!row){
3450     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3451   }
3452   if (!col){
3453     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3454   }
3455   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3456   PetscFunctionReturn(0);
3457 }
3458 
3459 static PetscEvent logkey_GetBrowsOfAcols = 0;
3460 #undef __FUNCT__
3461 #define __FUNCT__ "MatGetBrowsOfAcols"
3462 /*@C
3463     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3464 
3465     Collective on Mat
3466 
3467    Input Parameters:
3468 +    A,B - the matrices in mpiaij format
3469 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3470 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3471 
3472    Output Parameter:
3473 +    rowb, colb - index sets of rows and columns of B to extract
3474 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3475 -    B_seq - the sequential matrix generated
3476 
3477     Level: developer
3478 
3479 @*/
3480 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3481 {
3482   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3483   PetscErrorCode    ierr;
3484   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3485   IS                isrowb,iscolb;
3486   Mat               *bseq;
3487 
3488   PetscFunctionBegin;
3489   if (a->cstart != b->rstart || a->cend != b->rend){
3490     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3491   }
3492   if (!logkey_GetBrowsOfAcols) {
3493     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3494   }
3495   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3496 
3497   if (scall == MAT_INITIAL_MATRIX){
3498     start = a->cstart;
3499     cmap  = a->garray;
3500     nzA   = a->A->n;
3501     nzB   = a->B->n;
3502     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3503     ncols = 0;
3504     for (i=0; i<nzB; i++) {  /* row < local row index */
3505       if (cmap[i] < start) idx[ncols++] = cmap[i];
3506       else break;
3507     }
3508     imark = i;
3509     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3510     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3511     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3512     ierr = PetscFree(idx);CHKERRQ(ierr);
3513     *brstart = imark;
3514     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3515   } else {
3516     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3517     isrowb = *rowb; iscolb = *colb;
3518     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3519     bseq[0] = *B_seq;
3520   }
3521   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3522   *B_seq = bseq[0];
3523   ierr = PetscFree(bseq);CHKERRQ(ierr);
3524   if (!rowb){
3525     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3526   } else {
3527     *rowb = isrowb;
3528   }
3529   if (!colb){
3530     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3531   } else {
3532     *colb = iscolb;
3533   }
3534   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3535   PetscFunctionReturn(0);
3536 }
3537 
3538 static PetscEvent logkey_GetBrowsOfAocols = 0;
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatGetBrowsOfAoCols"
3541 /*@C
3542     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3543     of the OFF-DIAGONAL portion of local A
3544 
3545     Collective on Mat
3546 
3547    Input Parameters:
3548 +    A,B - the matrices in mpiaij format
3549 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3550 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3551 
3552    Output Parameter:
3553 +    rowb, colb - index sets of rows and columns of B to extract
3554 .    brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend.
3555 -    B_seq - the sequential matrix generated
3556 
3557     Level: developer
3558 
3559 @*/
3560 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3561 {
3562   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3563   PetscErrorCode    ierr;
3564   PetscInt          *idx,i,start,ncols,nzB,*cmap;
3565   IS                isrowb,iscolb;
3566   Mat               *bseq;
3567 
3568   PetscFunctionBegin;
3569   if (a->cstart != b->rstart || a->cend != b->rend){
3570     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3571   }
3572   if (!logkey_GetBrowsOfAocols) {
3573     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE);
3574   }
3575   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3576 
3577   if (scall == MAT_INITIAL_MATRIX){
3578     start = a->cstart;
3579     cmap  = a->garray;
3580     nzB   = a->B->n;
3581     if (!nzB){ /* output a null matrix */
3582       B_seq = PETSC_NULL;
3583       ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3584       PetscFunctionReturn(0);
3585     }
3586     ierr  = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3587     ncols = 0;
3588     *brstart = nzB;
3589     for (i=0; i<nzB; i++) {  /* row < local row index */
3590       if (cmap[i] < start) idx[ncols++] = cmap[i];
3591       else break;
3592     }
3593     *brstart = i;
3594     for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3596     ierr = PetscFree(idx);CHKERRQ(ierr);
3597 
3598     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3599   } else {
3600     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3601     isrowb = *rowb; iscolb = *colb;
3602     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3603     bseq[0] = *B_seq;
3604   }
3605   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3606   *B_seq = bseq[0];
3607   ierr = PetscFree(bseq);CHKERRQ(ierr);
3608   if (!rowb){
3609     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3610   } else {
3611     *rowb = isrowb;
3612   }
3613   if (!colb){
3614     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3615   } else {
3616     *colb = iscolb;
3617   }
3618   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3619   PetscFunctionReturn(0);
3620 }
3621