xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision aac34f1303709ac0de79fa856dd6d448cc462e90)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374 
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
397   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
398 
399   if (aij->rowvalues) {
400     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
401     aij->rowvalues = 0;
402   }
403 
404   /* used by MatAXPY() */
405   a->xtoy = 0; b->xtoy = 0;
406   a->XtoY = 0; b->XtoY = 0;
407 
408   PetscFunctionReturn(0);
409 }
410 
411 #undef __FUNCT__
412 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
413 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
414 {
415   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
416   PetscErrorCode ierr;
417 
418   PetscFunctionBegin;
419   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
420   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroRows_MPIAIJ"
426 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
427 {
428   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
429   PetscErrorCode ierr;
430   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
431   PetscInt       i,N,*rows,*owners = l->rowners;
432   PetscInt       *nprocs,j,idx,nsends,row;
433   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
434   PetscInt       *rvalues,count,base,slen,*source;
435   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
436   MPI_Comm       comm = A->comm;
437   MPI_Request    *send_waits,*recv_waits;
438   MPI_Status     recv_status,*send_status;
439   IS             istmp;
440   PetscTruth     found;
441 
442   PetscFunctionBegin;
443   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
444   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
445 
446   /*  first count number of contributors to each processor */
447   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
448   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
450   for (i=0; i<N; i++) {
451     idx = rows[i];
452     found = PETSC_FALSE;
453     for (j=0; j<size; j++) {
454       if (idx >= owners[j] && idx < owners[j+1]) {
455         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
456       }
457     }
458     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
459   }
460   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
461 
462   /* inform other processors of number of messages and max length*/
463   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
464 
465   /* post receives:   */
466   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
467   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
468   for (i=0; i<nrecvs; i++) {
469     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
470   }
471 
472   /* do sends:
473       1) starts[i] gives the starting index in svalues for stuff going to
474          the ith processor
475   */
476   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
477   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
478   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
479   starts[0] = 0;
480   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
481   for (i=0; i<N; i++) {
482     svalues[starts[owner[i]]++] = rows[i];
483   }
484   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
485 
486   starts[0] = 0;
487   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
488   count = 0;
489   for (i=0; i<size; i++) {
490     if (nprocs[2*i+1]) {
491       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
492     }
493   }
494   ierr = PetscFree(starts);CHKERRQ(ierr);
495 
496   base = owners[rank];
497 
498   /*  wait on receives */
499   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
500   source = lens + nrecvs;
501   count  = nrecvs; slen = 0;
502   while (count) {
503     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
504     /* unpack receives into our local space */
505     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
506     source[imdex]  = recv_status.MPI_SOURCE;
507     lens[imdex]    = n;
508     slen          += n;
509     count--;
510   }
511   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
512 
513   /* move the data into the send scatter */
514   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
515   count = 0;
516   for (i=0; i<nrecvs; i++) {
517     values = rvalues + i*nmax;
518     for (j=0; j<lens[i]; j++) {
519       lrows[count++] = values[j] - base;
520     }
521   }
522   ierr = PetscFree(rvalues);CHKERRQ(ierr);
523   ierr = PetscFree(lens);CHKERRQ(ierr);
524   ierr = PetscFree(owner);CHKERRQ(ierr);
525   ierr = PetscFree(nprocs);CHKERRQ(ierr);
526 
527   /* actually zap the local rows */
528   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
529   PetscLogObjectParent(A,istmp);
530 
531   /*
532         Zero the required rows. If the "diagonal block" of the matrix
533      is square and the user wishes to set the diagonal we use seperate
534      code so that MatSetValues() is not called for each diagonal allocating
535      new memory, thus calling lots of mallocs and slowing things down.
536 
537        Contributed by: Mathew Knepley
538   */
539   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
540   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
541   if (diag && (l->A->M == l->A->N)) {
542     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
543   } else if (diag) {
544     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
545     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
546       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
547 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
548     }
549     for (i = 0; i < slen; i++) {
550       row  = lrows[i] + rstart;
551       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
552     }
553     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
555   } else {
556     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
557   }
558   ierr = ISDestroy(istmp);CHKERRQ(ierr);
559   ierr = PetscFree(lrows);CHKERRQ(ierr);
560 
561   /* wait on sends */
562   if (nsends) {
563     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
564     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
565     ierr = PetscFree(send_status);CHKERRQ(ierr);
566   }
567   ierr = PetscFree(send_waits);CHKERRQ(ierr);
568   ierr = PetscFree(svalues);CHKERRQ(ierr);
569 
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatMult_MPIAIJ"
575 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
576 {
577   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
578   PetscErrorCode ierr;
579   PetscInt       nt;
580 
581   PetscFunctionBegin;
582   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
583   if (nt != A->n) {
584     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
585   }
586   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
588   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
589   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
590   PetscFunctionReturn(0);
591 }
592 
593 #undef __FUNCT__
594 #define __FUNCT__ "MatMultAdd_MPIAIJ"
595 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
596 {
597   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
598   PetscErrorCode ierr;
599 
600   PetscFunctionBegin;
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
610 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   /* do nondiagonal part */
617   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
618   /* send it on its way */
619   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
620   /* do local part */
621   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
622   /* receive remote parts: note this assumes the values are not actually */
623   /* inserted in yy until the next line, which is true for my implementation*/
624   /* but is not perhaps always true. */
625   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 EXTERN_C_BEGIN
630 #undef __FUNCT__
631 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
632 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
633 {
634   MPI_Comm       comm;
635   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
636   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
637   IS             Me,Notme;
638   PetscErrorCode ierr;
639   PetscInt       M,N,first,last,*notme,i;
640   PetscMPIInt    size;
641 
642   PetscFunctionBegin;
643 
644   /* Easy test: symmetric diagonal block */
645   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
646   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
647   if (!*f) PetscFunctionReturn(0);
648   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
649   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
650   if (size == 1) PetscFunctionReturn(0);
651 
652   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
653   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
654   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
655   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
656   for (i=0; i<first; i++) notme[i] = i;
657   for (i=last; i<M; i++) notme[i-last+first] = i;
658   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
659   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
660   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
661   Aoff = Aoffs[0];
662   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
663   Boff = Boffs[0];
664   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
665   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
666   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
667   ierr = ISDestroy(Me);CHKERRQ(ierr);
668   ierr = ISDestroy(Notme);CHKERRQ(ierr);
669 
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
679   PetscErrorCode ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   PetscErrorCode ierr;
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
718 {
719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
720   PetscErrorCode ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753 
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatView_MPIAIJ_Binary"
765 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
766 {
767   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
768   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
769   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
770   PetscErrorCode    ierr;
771   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
772   int               fd;
773   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
774   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
775   PetscScalar       *column_values;
776 
777   PetscFunctionBegin;
778   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
779   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
780   nz   = A->nz + B->nz;
781   if (!rank) {
782     header[0] = MAT_FILE_COOKIE;
783     header[1] = mat->M;
784     header[2] = mat->N;
785     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
786     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
787     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
788     /* get largest number of rows any processor has */
789     rlen = mat->m;
790     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
791     for (i=1; i<size; i++) {
792       rlen = PetscMax(rlen,range[i+1] - range[i]);
793     }
794   } else {
795     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
796     rlen = mat->m;
797   }
798 
799   /* load up the local row counts */
800   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
801   for (i=0; i<mat->m; i++) {
802     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
803   }
804 
805   /* store the row lengths to the file */
806   if (!rank) {
807     MPI_Status status;
808     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
809     for (i=1; i<size; i++) {
810       rlen = range[i+1] - range[i];
811       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
812       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
813     }
814   } else {
815     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
816   }
817   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
818 
819   /* load up the local column indices */
820   nzmax = nz; /* )th processor needs space a largest processor needs */
821   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
822   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
823   cnt  = 0;
824   for (i=0; i<mat->m; i++) {
825     for (j=B->i[i]; j<B->i[i+1]; j++) {
826       if ( (col = garray[B->j[j]]) > cstart) break;
827       column_indices[cnt++] = col;
828     }
829     for (k=A->i[i]; k<A->i[i+1]; k++) {
830       column_indices[cnt++] = A->j[k] + cstart;
831     }
832     for (; j<B->i[i+1]; j++) {
833       column_indices[cnt++] = garray[B->j[j]];
834     }
835   }
836   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
837 
838   /* store the column indices to the file */
839   if (!rank) {
840     MPI_Status status;
841     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
842     for (i=1; i<size; i++) {
843       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
844       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
845       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
846       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     }
848   } else {
849     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
850     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
851   }
852   ierr = PetscFree(column_indices);CHKERRQ(ierr);
853 
854   /* load up the local column values */
855   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
856   cnt  = 0;
857   for (i=0; i<mat->m; i++) {
858     for (j=B->i[i]; j<B->i[i+1]; j++) {
859       if ( garray[B->j[j]] > cstart) break;
860       column_values[cnt++] = B->a[j];
861     }
862     for (k=A->i[i]; k<A->i[i+1]; k++) {
863       column_values[cnt++] = A->a[k];
864     }
865     for (; j<B->i[i+1]; j++) {
866       column_values[cnt++] = B->a[j];
867     }
868   }
869   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
870 
871   /* store the column values to the file */
872   if (!rank) {
873     MPI_Status status;
874     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
875     for (i=1; i<size; i++) {
876       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
877       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
878       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
879       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     }
881   } else {
882     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
883     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
884   }
885   ierr = PetscFree(column_values);CHKERRQ(ierr);
886   PetscFunctionReturn(0);
887 }
888 
889 #undef __FUNCT__
890 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
891 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
892 {
893   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
894   PetscErrorCode    ierr;
895   PetscMPIInt       rank = aij->rank,size = aij->size;
896   PetscTruth        isdraw,iascii,flg,isbinary;
897   PetscViewer       sviewer;
898   PetscViewerFormat format;
899 
900   PetscFunctionBegin;
901   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
902   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
903   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
904   if (iascii) {
905     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
906     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
907       MatInfo info;
908       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
909       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
910       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
911       if (flg) {
912         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
913 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
914       } else {
915         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
916 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
917       }
918       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
919       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
920       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
921       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
922       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
923       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
924       PetscFunctionReturn(0);
925     } else if (format == PETSC_VIEWER_ASCII_INFO) {
926       PetscFunctionReturn(0);
927     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
928       PetscFunctionReturn(0);
929     }
930   } else if (isbinary) {
931     if (size == 1) {
932       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934     } else {
935       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
936     }
937     PetscFunctionReturn(0);
938   } else if (isdraw) {
939     PetscDraw  draw;
940     PetscTruth isnull;
941     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
942     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
943   }
944 
945   if (size == 1) {
946     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
947     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
948   } else {
949     /* assemble the entire matrix onto first processor. */
950     Mat         A;
951     Mat_SeqAIJ *Aloc;
952     PetscInt         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
953     PetscScalar *a;
954 
955     if (!rank) {
956       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
957     } else {
958       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
959     }
960     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
961     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
962     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
963     PetscLogObjectParent(mat,A);
964 
965     /* copy over the A part */
966     Aloc = (Mat_SeqAIJ*)aij->A->data;
967     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row = aij->rstart;
969     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
973     }
974     aj = Aloc->j;
975     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
976 
977     /* copy over the B part */
978     Aloc = (Mat_SeqAIJ*)aij->B->data;
979     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
980     row  = aij->rstart;
981     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
982     ct   = cols;
983     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
984     for (i=0; i<m; i++) {
985       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
986       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
987     }
988     ierr = PetscFree(ct);CHKERRQ(ierr);
989     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991     /*
992        Everyone has to call to draw the matrix since the graphics waits are
993        synchronized across all processors that share the PetscDraw object
994     */
995     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
998       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
999     }
1000     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1001     ierr = MatDestroy(A);CHKERRQ(ierr);
1002   }
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatView_MPIAIJ"
1008 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1009 {
1010   PetscErrorCode ierr;
1011   PetscTruth     iascii,isdraw,issocket,isbinary;
1012 
1013   PetscFunctionBegin;
1014   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1015   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1016   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1017   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1018   if (iascii || isdraw || isbinary || issocket) {
1019     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1020   } else {
1021     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1022   }
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 
1027 
1028 #undef __FUNCT__
1029 #define __FUNCT__ "MatRelax_MPIAIJ"
1030 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1031 {
1032   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1033   PetscErrorCode ierr;
1034   Vec            bb1;
1035   PetscScalar    mone=-1.0;
1036 
1037   PetscFunctionBegin;
1038   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1039 
1040   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1041 
1042   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1043     if (flag & SOR_ZERO_INITIAL_GUESS) {
1044       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1045       its--;
1046     }
1047 
1048     while (its--) {
1049       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051 
1052       /* update rhs: bb1 = bb - B*x */
1053       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1054       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1055 
1056       /* local sweep */
1057       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1058       CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1075       CHKERRQ(ierr);
1076     }
1077   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1078     if (flag & SOR_ZERO_INITIAL_GUESS) {
1079       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1080       its--;
1081     }
1082     while (its--) {
1083       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1084       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1085 
1086       /* update rhs: bb1 = bb - B*x */
1087       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1088       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1089 
1090       /* local sweep */
1091       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1092       CHKERRQ(ierr);
1093     }
1094   } else {
1095     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1096   }
1097 
1098   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1104 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1105 {
1106   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1107   Mat            A = mat->A,B = mat->B;
1108   PetscErrorCode ierr;
1109   PetscReal      isend[5],irecv[5];
1110 
1111   PetscFunctionBegin;
1112   info->block_size     = 1.0;
1113   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1114   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1115   isend[3] = info->memory;  isend[4] = info->mallocs;
1116   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1117   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1118   isend[3] += info->memory;  isend[4] += info->mallocs;
1119   if (flag == MAT_LOCAL) {
1120     info->nz_used      = isend[0];
1121     info->nz_allocated = isend[1];
1122     info->nz_unneeded  = isend[2];
1123     info->memory       = isend[3];
1124     info->mallocs      = isend[4];
1125   } else if (flag == MAT_GLOBAL_MAX) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else if (flag == MAT_GLOBAL_SUM) {
1133     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1134     info->nz_used      = irecv[0];
1135     info->nz_allocated = irecv[1];
1136     info->nz_unneeded  = irecv[2];
1137     info->memory       = irecv[3];
1138     info->mallocs      = irecv[4];
1139   }
1140   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1141   info->fill_ratio_needed = 0;
1142   info->factor_mallocs    = 0;
1143   info->rows_global       = (double)matin->M;
1144   info->columns_global    = (double)matin->N;
1145   info->rows_local        = (double)matin->m;
1146   info->columns_local     = (double)matin->N;
1147 
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 #undef __FUNCT__
1152 #define __FUNCT__ "MatSetOption_MPIAIJ"
1153 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1154 {
1155   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1156   PetscErrorCode ierr;
1157 
1158   PetscFunctionBegin;
1159   switch (op) {
1160   case MAT_NO_NEW_NONZERO_LOCATIONS:
1161   case MAT_YES_NEW_NONZERO_LOCATIONS:
1162   case MAT_COLUMNS_UNSORTED:
1163   case MAT_COLUMNS_SORTED:
1164   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1165   case MAT_KEEP_ZEROED_ROWS:
1166   case MAT_NEW_NONZERO_LOCATION_ERR:
1167   case MAT_USE_INODES:
1168   case MAT_DO_NOT_USE_INODES:
1169   case MAT_IGNORE_ZERO_ENTRIES:
1170     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1171     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1172     break;
1173   case MAT_ROW_ORIENTED:
1174     a->roworiented = PETSC_TRUE;
1175     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1176     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1177     break;
1178   case MAT_ROWS_SORTED:
1179   case MAT_ROWS_UNSORTED:
1180   case MAT_YES_NEW_DIAGONALS:
1181     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1182     break;
1183   case MAT_COLUMN_ORIENTED:
1184     a->roworiented = PETSC_FALSE;
1185     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1186     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1187     break;
1188   case MAT_IGNORE_OFF_PROC_ENTRIES:
1189     a->donotstash = PETSC_TRUE;
1190     break;
1191   case MAT_NO_NEW_DIAGONALS:
1192     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1193   case MAT_SYMMETRIC:
1194   case MAT_STRUCTURALLY_SYMMETRIC:
1195   case MAT_HERMITIAN:
1196   case MAT_SYMMETRY_ETERNAL:
1197     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_NOT_SYMMETRIC:
1200   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1201   case MAT_NOT_HERMITIAN:
1202   case MAT_NOT_SYMMETRY_ETERNAL:
1203     break;
1204   default:
1205     SETERRQ(PETSC_ERR_SUP,"unknown option");
1206   }
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatGetRow_MPIAIJ"
1212 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1213 {
1214   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1215   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1216   PetscErrorCode ierr;
1217   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1218   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1219   PetscInt       *cmap,*idx_p;
1220 
1221   PetscFunctionBegin;
1222   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1223   mat->getrowactive = PETSC_TRUE;
1224 
1225   if (!mat->rowvalues && (idx || v)) {
1226     /*
1227         allocate enough space to hold information from the longest row.
1228     */
1229     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1230     PetscInt     max = 1,tmp;
1231     for (i=0; i<matin->m; i++) {
1232       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1233       if (max < tmp) { max = tmp; }
1234     }
1235     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1236     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1237   }
1238 
1239   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1240   lrow = row - rstart;
1241 
1242   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1243   if (!v)   {pvA = 0; pvB = 0;}
1244   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1245   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1246   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1247   nztot = nzA + nzB;
1248 
1249   cmap  = mat->garray;
1250   if (v  || idx) {
1251     if (nztot) {
1252       /* Sort by increasing column numbers, assuming A and B already sorted */
1253       PetscInt imark = -1;
1254       if (v) {
1255         *v = v_p = mat->rowvalues;
1256         for (i=0; i<nzB; i++) {
1257           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1258           else break;
1259         }
1260         imark = i;
1261         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1262         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1263       }
1264       if (idx) {
1265         *idx = idx_p = mat->rowindices;
1266         if (imark > -1) {
1267           for (i=0; i<imark; i++) {
1268             idx_p[i] = cmap[cworkB[i]];
1269           }
1270         } else {
1271           for (i=0; i<nzB; i++) {
1272             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1273             else break;
1274           }
1275           imark = i;
1276         }
1277         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1278         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1279       }
1280     } else {
1281       if (idx) *idx = 0;
1282       if (v)   *v   = 0;
1283     }
1284   }
1285   *nz = nztot;
1286   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1287   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1293 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1294 {
1295   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1296 
1297   PetscFunctionBegin;
1298   if (aij->getrowactive == PETSC_FALSE) {
1299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1300   }
1301   aij->getrowactive = PETSC_FALSE;
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatNorm_MPIAIJ"
1307 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1308 {
1309   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1310   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1311   PetscErrorCode ierr;
1312   PetscInt       i,j,cstart = aij->cstart;
1313   PetscReal      sum = 0.0;
1314   PetscScalar    *v;
1315 
1316   PetscFunctionBegin;
1317   if (aij->size == 1) {
1318     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1319   } else {
1320     if (type == NORM_FROBENIUS) {
1321       v = amat->a;
1322       for (i=0; i<amat->nz; i++) {
1323 #if defined(PETSC_USE_COMPLEX)
1324         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1325 #else
1326         sum += (*v)*(*v); v++;
1327 #endif
1328       }
1329       v = bmat->a;
1330       for (i=0; i<bmat->nz; i++) {
1331 #if defined(PETSC_USE_COMPLEX)
1332         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1333 #else
1334         sum += (*v)*(*v); v++;
1335 #endif
1336       }
1337       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1338       *norm = sqrt(*norm);
1339     } else if (type == NORM_1) { /* max column norm */
1340       PetscReal *tmp,*tmp2;
1341       PetscInt    *jj,*garray = aij->garray;
1342       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1344       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1345       *norm = 0.0;
1346       v = amat->a; jj = amat->j;
1347       for (j=0; j<amat->nz; j++) {
1348         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1349       }
1350       v = bmat->a; jj = bmat->j;
1351       for (j=0; j<bmat->nz; j++) {
1352         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1353       }
1354       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1355       for (j=0; j<mat->N; j++) {
1356         if (tmp2[j] > *norm) *norm = tmp2[j];
1357       }
1358       ierr = PetscFree(tmp);CHKERRQ(ierr);
1359       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1360     } else if (type == NORM_INFINITY) { /* max row norm */
1361       PetscReal ntemp = 0.0;
1362       for (j=0; j<aij->A->m; j++) {
1363         v = amat->a + amat->i[j];
1364         sum = 0.0;
1365         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1366           sum += PetscAbsScalar(*v); v++;
1367         }
1368         v = bmat->a + bmat->i[j];
1369         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1370           sum += PetscAbsScalar(*v); v++;
1371         }
1372         if (sum > ntemp) ntemp = sum;
1373       }
1374       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1375     } else {
1376       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1377     }
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatTranspose_MPIAIJ"
1384 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1385 {
1386   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1387   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1388   PetscErrorCode ierr;
1389   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1390   Mat            B;
1391   PetscScalar    *array;
1392 
1393   PetscFunctionBegin;
1394   if (!matout && M != N) {
1395     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1396   }
1397 
1398   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1399   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1400   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1401 
1402   /* copy over the A part */
1403   Aloc = (Mat_SeqAIJ*)a->A->data;
1404   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1405   row = a->rstart;
1406   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1407   for (i=0; i<m; i++) {
1408     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1409     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1410   }
1411   aj = Aloc->j;
1412   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1413 
1414   /* copy over the B part */
1415   Aloc = (Mat_SeqAIJ*)a->B->data;
1416   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1417   row  = a->rstart;
1418   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1419   ct   = cols;
1420   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1421   for (i=0; i<m; i++) {
1422     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1423     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1424   }
1425   ierr = PetscFree(ct);CHKERRQ(ierr);
1426   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428   if (matout) {
1429     *matout = B;
1430   } else {
1431     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1432   }
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1438 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1439 {
1440   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1441   Mat            a = aij->A,b = aij->B;
1442   PetscErrorCode ierr;
1443   PetscInt       s1,s2,s3;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1447   if (rr) {
1448     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1449     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1450     /* Overlap communication with computation. */
1451     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1452   }
1453   if (ll) {
1454     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1455     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1456     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1457   }
1458   /* scale  the diagonal block */
1459   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1460 
1461   if (rr) {
1462     /* Do a scatter end and then right scale the off-diagonal block */
1463     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1464     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1465   }
1466 
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1473 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1474 {
1475   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   if (!a->rank) {
1480     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1487 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,PetscInt *bs)
1488 {
1489   PetscFunctionBegin;
1490   *bs = 1;
1491   PetscFunctionReturn(0);
1492 }
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1495 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1496 {
1497   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1498   PetscErrorCode ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatEqual_MPIAIJ"
1507 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1508 {
1509   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1510   Mat            a,b,c,d;
1511   PetscTruth     flg;
1512   PetscErrorCode ierr;
1513 
1514   PetscFunctionBegin;
1515   a = matA->A; b = matA->B;
1516   c = matB->A; d = matB->B;
1517 
1518   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1519   if (flg == PETSC_TRUE) {
1520     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1521   }
1522   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatCopy_MPIAIJ"
1528 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1529 {
1530   PetscErrorCode ierr;
1531   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1532   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1533 
1534   PetscFunctionBegin;
1535   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1536   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1537     /* because of the column compression in the off-processor part of the matrix a->B,
1538        the number of columns in a->B and b->B may be different, hence we cannot call
1539        the MatCopy() directly on the two parts. If need be, we can provide a more
1540        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1541        then copying the submatrices */
1542     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1543   } else {
1544     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1545     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1552 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1553 {
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #include "petscblaslapack.h"
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatAXPY_MPIAIJ"
1564 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1565 {
1566   PetscErrorCode ierr;
1567   PetscInt       i;
1568   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1569   PetscBLASInt   bnz,one=1;
1570   Mat_SeqAIJ     *x,*y;
1571 
1572   PetscFunctionBegin;
1573   if (str == SAME_NONZERO_PATTERN) {
1574     x = (Mat_SeqAIJ *)xx->A->data;
1575     y = (Mat_SeqAIJ *)yy->A->data;
1576     bnz = (PetscBLASInt)x->nz;
1577     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1578     x = (Mat_SeqAIJ *)xx->B->data;
1579     y = (Mat_SeqAIJ *)yy->B->data;
1580     bnz = (PetscBLASInt)x->nz;
1581     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1582   } else if (str == SUBSET_NONZERO_PATTERN) {
1583     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1584 
1585     x = (Mat_SeqAIJ *)xx->B->data;
1586     y = (Mat_SeqAIJ *)yy->B->data;
1587     if (y->xtoy && y->XtoY != xx->B) {
1588       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1589       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1590     }
1591     if (!y->xtoy) { /* get xtoy */
1592       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1593       y->XtoY = xx->B;
1594     }
1595     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1596   } else {
1597     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 /* -------------------------------------------------------------------*/
1603 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1604        MatGetRow_MPIAIJ,
1605        MatRestoreRow_MPIAIJ,
1606        MatMult_MPIAIJ,
1607 /* 4*/ MatMultAdd_MPIAIJ,
1608        MatMultTranspose_MPIAIJ,
1609        MatMultTransposeAdd_MPIAIJ,
1610        0,
1611        0,
1612        0,
1613 /*10*/ 0,
1614        0,
1615        0,
1616        MatRelax_MPIAIJ,
1617        MatTranspose_MPIAIJ,
1618 /*15*/ MatGetInfo_MPIAIJ,
1619        MatEqual_MPIAIJ,
1620        MatGetDiagonal_MPIAIJ,
1621        MatDiagonalScale_MPIAIJ,
1622        MatNorm_MPIAIJ,
1623 /*20*/ MatAssemblyBegin_MPIAIJ,
1624        MatAssemblyEnd_MPIAIJ,
1625        0,
1626        MatSetOption_MPIAIJ,
1627        MatZeroEntries_MPIAIJ,
1628 /*25*/ MatZeroRows_MPIAIJ,
1629 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_64BIT_INT)
1630        MatLUFactorSymbolic_MPIAIJ_TFS,
1631 #else
1632        0,
1633 #endif
1634        0,
1635        0,
1636        0,
1637 /*30*/ MatSetUpPreallocation_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*35*/ MatDuplicate_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*40*/ MatAXPY_MPIAIJ,
1648        MatGetSubMatrices_MPIAIJ,
1649        MatIncreaseOverlap_MPIAIJ,
1650        MatGetValues_MPIAIJ,
1651        MatCopy_MPIAIJ,
1652 /*45*/ MatPrintHelp_MPIAIJ,
1653        MatScale_MPIAIJ,
1654        0,
1655        0,
1656        0,
1657 /*50*/ MatGetBlockSize_MPIAIJ,
1658        0,
1659        0,
1660        0,
1661        0,
1662 /*55*/ MatFDColoringCreate_MPIAIJ,
1663        0,
1664        MatSetUnfactored_MPIAIJ,
1665        0,
1666        0,
1667 /*60*/ MatGetSubMatrix_MPIAIJ,
1668        MatDestroy_MPIAIJ,
1669        MatView_MPIAIJ,
1670        MatGetPetscMaps_Petsc,
1671        0,
1672 /*65*/ 0,
1673        0,
1674        0,
1675        0,
1676        0,
1677 /*70*/ 0,
1678        0,
1679        MatSetColoring_MPIAIJ,
1680        MatSetValuesAdic_MPIAIJ,
1681        MatSetValuesAdifor_MPIAIJ,
1682 /*75*/ 0,
1683        0,
1684        0,
1685        0,
1686        0,
1687 /*80*/ 0,
1688        0,
1689        0,
1690        0,
1691 /*84*/ MatLoad_MPIAIJ,
1692        0,
1693        0,
1694        0,
1695        0,
1696        0,
1697 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1698        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1699        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1700        MatPtAP_MPIAIJ_MPIAIJ,
1701        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1702 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1703        0,
1704        0,
1705        0};
1706 
1707 /* ----------------------------------------------------------------------------------------*/
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1712 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1727 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1728 {
1729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 #include "petscpc.h"
1740 EXTERN_C_BEGIN
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1744 {
1745   Mat_MPIAIJ     *b;
1746   PetscErrorCode ierr;
1747   PetscInt       i;
1748 
1749   PetscFunctionBegin;
1750   B->preallocated = PETSC_TRUE;
1751   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1752   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1753   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1754   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1755   if (d_nnz) {
1756     for (i=0; i<B->m; i++) {
1757       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1758     }
1759   }
1760   if (o_nnz) {
1761     for (i=0; i<B->m; i++) {
1762       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1763     }
1764   }
1765   b = (Mat_MPIAIJ*)B->data;
1766   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1767   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 EXTERN_C_END
1772 
1773 /*MC
1774    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1775 
1776    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1777    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1778   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1779   for communicators controlling multiple processes.  It is recommended that you call both of
1780   the above preallocation routines for simplicity.
1781 
1782    Options Database Keys:
1783 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1784 
1785   Level: beginner
1786 
1787 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1788 M*/
1789 
1790 EXTERN_C_BEGIN
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatCreate_AIJ"
1793 PetscErrorCode MatCreate_AIJ(Mat A)
1794 {
1795   PetscErrorCode ierr;
1796   PetscMPIInt    size;
1797 
1798   PetscFunctionBegin;
1799   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1800   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1801   if (size == 1) {
1802     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1803   } else {
1804     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1805   }
1806   PetscFunctionReturn(0);
1807 }
1808 EXTERN_C_END
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1812 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1813 {
1814   Mat            mat;
1815   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   *newmat       = 0;
1820   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1821   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1822   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1823   a    = (Mat_MPIAIJ*)mat->data;
1824 
1825   mat->factor       = matin->factor;
1826   mat->assembled    = PETSC_TRUE;
1827   mat->insertmode   = NOT_SET_VALUES;
1828   mat->preallocated = PETSC_TRUE;
1829 
1830   a->rstart       = oldmat->rstart;
1831   a->rend         = oldmat->rend;
1832   a->cstart       = oldmat->cstart;
1833   a->cend         = oldmat->cend;
1834   a->size         = oldmat->size;
1835   a->rank         = oldmat->rank;
1836   a->donotstash   = oldmat->donotstash;
1837   a->roworiented  = oldmat->roworiented;
1838   a->rowindices   = 0;
1839   a->rowvalues    = 0;
1840   a->getrowactive = PETSC_FALSE;
1841 
1842   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1843   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1844   if (oldmat->colmap) {
1845 #if defined (PETSC_USE_CTABLE)
1846     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1847 #else
1848     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1849     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1850     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1851 #endif
1852   } else a->colmap = 0;
1853   if (oldmat->garray) {
1854     PetscInt len;
1855     len  = oldmat->B->n;
1856     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1857     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1858     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1859   } else a->garray = 0;
1860 
1861   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1862   PetscLogObjectParent(mat,a->lvec);
1863   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1864   PetscLogObjectParent(mat,a->Mvctx);
1865   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1866   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1867   PetscLogObjectParent(mat,a->A);
1868   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1869   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1870   PetscLogObjectParent(mat,a->B);
1871   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1872   *newmat = mat;
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #include "petscsys.h"
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatLoad_MPIAIJ"
1880 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1881 {
1882   Mat            A;
1883   PetscScalar    *vals,*svals;
1884   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1885   MPI_Status     status;
1886   PetscErrorCode ierr;
1887   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*rowners,maxnz,mm;
1888   PetscInt       i,nz,j,rstart,rend;
1889   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1890   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1891   PetscInt       cend,cstart,n;
1892   int            fd;
1893 
1894   PetscFunctionBegin;
1895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1896   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1897   if (!rank) {
1898     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1899     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1900     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1901     if (header[3] < 0) {
1902       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1903     }
1904   }
1905 
1906   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1907   M = header[1]; N = header[2];
1908   /* determine ownership of all rows */
1909   m = M/size + ((M % size) > rank);
1910   mm = (PetscMPIInt) m;
1911   ierr = PetscMalloc((size+2)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1912   ierr = MPI_Allgather(&mm,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1913   rowners[0] = 0;
1914   for (i=2; i<=size; i++) {
1915     rowners[i] += rowners[i-1];
1916   }
1917   rstart = rowners[rank];
1918   rend   = rowners[rank+1];
1919 
1920   /* distribute row lengths to all processors */
1921   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&ourlens);CHKERRQ(ierr);
1922   offlens = ourlens + (rend-rstart);
1923   if (!rank) {
1924     ierr = PetscMalloc(M*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1925     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1926     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
1927     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1928     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1929     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1930   } else {
1931     ierr = MPI_Scatterv(0,0,0,MPIU_INT,ourlens,rend-rstart,MPIU_INT,0,comm);CHKERRQ(ierr);
1932   }
1933 
1934   if (!rank) {
1935     /* calculate the number of nonzeros on each processor */
1936     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1937     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (i=0; i<size; i++) {
1939       for (j=rowners[i]; j< rowners[i+1]; j++) {
1940         procsnz[i] += rowlengths[j];
1941       }
1942     }
1943     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1944 
1945     /* determine max buffer needed and allocate it */
1946     maxnz = 0;
1947     for (i=0; i<size; i++) {
1948       maxnz = PetscMax(maxnz,procsnz[i]);
1949     }
1950     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1951 
1952     /* read in my part of the matrix column indices  */
1953     nz   = procsnz[0];
1954     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1955     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1956 
1957     /* read in every one elses and ship off */
1958     for (i=1; i<size; i++) {
1959       nz   = procsnz[i];
1960       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1961       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1962     }
1963     ierr = PetscFree(cols);CHKERRQ(ierr);
1964   } else {
1965     /* determine buffer space needed for message */
1966     nz = 0;
1967     for (i=0; i<m; i++) {
1968       nz += ourlens[i];
1969     }
1970     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1971 
1972     /* receive message of column indices*/
1973     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1974     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1975     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1976   }
1977 
1978   /* determine column ownership if matrix is not square */
1979   if (N != M) {
1980     n      = N/size + ((N % size) > rank);
1981     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1982     cstart = cend - n;
1983   } else {
1984     cstart = rstart;
1985     cend   = rend;
1986     n      = cend - cstart;
1987   }
1988 
1989   /* loop over local rows, determining number of off diagonal entries */
1990   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1991   jj = 0;
1992   for (i=0; i<m; i++) {
1993     for (j=0; j<ourlens[i]; j++) {
1994       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1995       jj++;
1996     }
1997   }
1998 
1999   /* create our matrix */
2000   for (i=0; i<m; i++) {
2001     ourlens[i] -= offlens[i];
2002   }
2003   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2004   ierr = MatSetType(A,type);CHKERRQ(ierr);
2005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2006 
2007   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2008   for (i=0; i<m; i++) {
2009     ourlens[i] += offlens[i];
2010   }
2011 
2012   if (!rank) {
2013     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2014 
2015     /* read in my part of the matrix numerical values  */
2016     nz   = procsnz[0];
2017     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2018 
2019     /* insert into matrix */
2020     jj      = rstart;
2021     smycols = mycols;
2022     svals   = vals;
2023     for (i=0; i<m; i++) {
2024       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2025       smycols += ourlens[i];
2026       svals   += ourlens[i];
2027       jj++;
2028     }
2029 
2030     /* read in other processors and ship out */
2031     for (i=1; i<size; i++) {
2032       nz   = procsnz[i];
2033       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2034       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2035     }
2036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2037   } else {
2038     /* receive numeric values */
2039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2040 
2041     /* receive message of values*/
2042     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2043     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2044     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2045 
2046     /* insert into matrix */
2047     jj      = rstart;
2048     smycols = mycols;
2049     svals   = vals;
2050     for (i=0; i<m; i++) {
2051       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2052       smycols += ourlens[i];
2053       svals   += ourlens[i];
2054       jj++;
2055     }
2056   }
2057   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2058   ierr = PetscFree(vals);CHKERRQ(ierr);
2059   ierr = PetscFree(mycols);CHKERRQ(ierr);
2060   ierr = PetscFree(rowners);CHKERRQ(ierr);
2061 
2062   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   *newmat = A;
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2070 /*
2071     Not great since it makes two copies of the submatrix, first an SeqAIJ
2072   in local and then by concatenating the local matrices the end result.
2073   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2074 */
2075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2076 {
2077   PetscErrorCode ierr;
2078   PetscMPIInt    rank,size;
2079   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2080   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2081   Mat            *local,M,Mreuse;
2082   PetscScalar    *vwork,*aa;
2083   MPI_Comm       comm = mat->comm;
2084   Mat_SeqAIJ     *aij;
2085 
2086 
2087   PetscFunctionBegin;
2088   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2089   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2090 
2091   if (call ==  MAT_REUSE_MATRIX) {
2092     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2093     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2094     local = &Mreuse;
2095     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2096   } else {
2097     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2098     Mreuse = *local;
2099     ierr   = PetscFree(local);CHKERRQ(ierr);
2100   }
2101 
2102   /*
2103       m - number of local rows
2104       n - number of columns (same on all processors)
2105       rstart - first row in new global matrix generated
2106   */
2107   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2108   if (call == MAT_INITIAL_MATRIX) {
2109     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2110     ii  = aij->i;
2111     jj  = aij->j;
2112 
2113     /*
2114         Determine the number of non-zeros in the diagonal and off-diagonal
2115         portions of the matrix in order to do correct preallocation
2116     */
2117 
2118     /* first get start and end of "diagonal" columns */
2119     if (csize == PETSC_DECIDE) {
2120       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2121       if (mglobal == n) { /* square matrix */
2122 	nlocal = m;
2123       } else {
2124         nlocal = n/size + ((n % size) > rank);
2125       }
2126     } else {
2127       nlocal = csize;
2128     }
2129     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2130     rstart = rend - nlocal;
2131     if (rank == size - 1 && rend != n) {
2132       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2133     }
2134 
2135     /* next, compute all the lengths */
2136     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2137     olens = dlens + m;
2138     for (i=0; i<m; i++) {
2139       jend = ii[i+1] - ii[i];
2140       olen = 0;
2141       dlen = 0;
2142       for (j=0; j<jend; j++) {
2143         if (*jj < rstart || *jj >= rend) olen++;
2144         else dlen++;
2145         jj++;
2146       }
2147       olens[i] = olen;
2148       dlens[i] = dlen;
2149     }
2150     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2151     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2152     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2153     ierr = PetscFree(dlens);CHKERRQ(ierr);
2154   } else {
2155     PetscInt ml,nl;
2156 
2157     M = *newmat;
2158     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2159     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2160     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2161     /*
2162          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2163        rather than the slower MatSetValues().
2164     */
2165     M->was_assembled = PETSC_TRUE;
2166     M->assembled     = PETSC_FALSE;
2167   }
2168   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2169   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2170   ii  = aij->i;
2171   jj  = aij->j;
2172   aa  = aij->a;
2173   for (i=0; i<m; i++) {
2174     row   = rstart + i;
2175     nz    = ii[i+1] - ii[i];
2176     cwork = jj;     jj += nz;
2177     vwork = aa;     aa += nz;
2178     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2179   }
2180 
2181   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   *newmat = M;
2184 
2185   /* save submatrix used in processor for next request */
2186   if (call ==  MAT_INITIAL_MATRIX) {
2187     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2188     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2189   }
2190 
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2197 {
2198   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2199   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2200   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2201   const PetscInt *JJ;
2202   PetscScalar    *values;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206 #if defined(PETSC_OPT_g)
2207   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2208 #endif
2209   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2210   o_nnz = d_nnz + m;
2211 
2212   for (i=0; i<m; i++) {
2213     nnz     = I[i+1]- I[i];
2214     JJ      = J + I[i];
2215     nnz_max = PetscMax(nnz_max,nnz);
2216 #if defined(PETSC_OPT_g)
2217     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2218 #endif
2219     for (j=0; j<nnz; j++) {
2220       if (*JJ >= cstart) break;
2221       JJ++;
2222     }
2223     d = 0;
2224     for (; j<nnz; j++) {
2225       if (*JJ++ >= cend) break;
2226       d++;
2227     }
2228     d_nnz[i] = d;
2229     o_nnz[i] = nnz - d;
2230   }
2231   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2232   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2233 
2234   if (v) values = (PetscScalar*)v;
2235   else {
2236     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2237     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2238   }
2239 
2240   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2241   for (i=0; i<m; i++) {
2242     ii   = i + rstart;
2243     nnz  = I[i+1]- I[i];
2244     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2245   }
2246   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2248   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2249 
2250   if (!v) {
2251     ierr = PetscFree(values);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2258 /*@C
2259    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2260    (the default parallel PETSc format).
2261 
2262    Collective on MPI_Comm
2263 
2264    Input Parameters:
2265 +  A - the matrix
2266 .  i - the indices into j for the start of each local row (starts with zero)
2267 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2268 -  v - optional values in the matrix
2269 
2270    Level: developer
2271 
2272 .keywords: matrix, aij, compressed row, sparse, parallel
2273 
2274 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2275 @*/
2276 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2277 {
2278   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2279 
2280   PetscFunctionBegin;
2281   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2282   if (f) {
2283     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2284   }
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2290 /*@C
2291    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2292    (the default parallel PETSc format).  For good matrix assembly performance
2293    the user should preallocate the matrix storage by setting the parameters
2294    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2295    performance can be increased by more than a factor of 50.
2296 
2297    Collective on MPI_Comm
2298 
2299    Input Parameters:
2300 +  A - the matrix
2301 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2302            (same value is used for all local rows)
2303 .  d_nnz - array containing the number of nonzeros in the various rows of the
2304            DIAGONAL portion of the local submatrix (possibly different for each row)
2305            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2306            The size of this array is equal to the number of local rows, i.e 'm'.
2307            You must leave room for the diagonal entry even if it is zero.
2308 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2309            submatrix (same value is used for all local rows).
2310 -  o_nnz - array containing the number of nonzeros in the various rows of the
2311            OFF-DIAGONAL portion of the local submatrix (possibly different for
2312            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2313            structure. The size of this array is equal to the number
2314            of local rows, i.e 'm'.
2315 
2316    The AIJ format (also called the Yale sparse matrix format or
2317    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2318    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2319 
2320    The parallel matrix is partitioned such that the first m0 rows belong to
2321    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2322    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2323 
2324    The DIAGONAL portion of the local submatrix of a processor can be defined
2325    as the submatrix which is obtained by extraction the part corresponding
2326    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2327    first row that belongs to the processor, and r2 is the last row belonging
2328    to the this processor. This is a square mxm matrix. The remaining portion
2329    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2330 
2331    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2332 
2333    Example usage:
2334 
2335    Consider the following 8x8 matrix with 34 non-zero values, that is
2336    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2337    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2338    as follows:
2339 
2340 .vb
2341             1  2  0  |  0  3  0  |  0  4
2342     Proc0   0  5  6  |  7  0  0  |  8  0
2343             9  0 10  | 11  0  0  | 12  0
2344     -------------------------------------
2345            13  0 14  | 15 16 17  |  0  0
2346     Proc1   0 18  0  | 19 20 21  |  0  0
2347             0  0  0  | 22 23  0  | 24  0
2348     -------------------------------------
2349     Proc2  25 26 27  |  0  0 28  | 29  0
2350            30  0  0  | 31 32 33  |  0 34
2351 .ve
2352 
2353    This can be represented as a collection of submatrices as:
2354 
2355 .vb
2356       A B C
2357       D E F
2358       G H I
2359 .ve
2360 
2361    Where the submatrices A,B,C are owned by proc0, D,E,F are
2362    owned by proc1, G,H,I are owned by proc2.
2363 
2364    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2365    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2366    The 'M','N' parameters are 8,8, and have the same values on all procs.
2367 
2368    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2369    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2370    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2371    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2372    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2373    matrix, ans [DF] as another SeqAIJ matrix.
2374 
2375    When d_nz, o_nz parameters are specified, d_nz storage elements are
2376    allocated for every row of the local diagonal submatrix, and o_nz
2377    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2378    One way to choose d_nz and o_nz is to use the max nonzerors per local
2379    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2380    In this case, the values of d_nz,o_nz are:
2381 .vb
2382      proc0 : dnz = 2, o_nz = 2
2383      proc1 : dnz = 3, o_nz = 2
2384      proc2 : dnz = 1, o_nz = 4
2385 .ve
2386    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2387    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2388    for proc3. i.e we are using 12+15+10=37 storage locations to store
2389    34 values.
2390 
2391    When d_nnz, o_nnz parameters are specified, the storage is specified
2392    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2393    In the above case the values for d_nnz,o_nnz are:
2394 .vb
2395      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2396      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2397      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2398 .ve
2399    Here the space allocated is sum of all the above values i.e 34, and
2400    hence pre-allocation is perfect.
2401 
2402    Level: intermediate
2403 
2404 .keywords: matrix, aij, compressed row, sparse, parallel
2405 
2406 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2407           MPIAIJ
2408 @*/
2409 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2410 {
2411   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2412 
2413   PetscFunctionBegin;
2414   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2415   if (f) {
2416     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2417   }
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatCreateMPIAIJ"
2423 /*@C
2424    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2425    (the default parallel PETSc format).  For good matrix assembly performance
2426    the user should preallocate the matrix storage by setting the parameters
2427    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2428    performance can be increased by more than a factor of 50.
2429 
2430    Collective on MPI_Comm
2431 
2432    Input Parameters:
2433 +  comm - MPI communicator
2434 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2435            This value should be the same as the local size used in creating the
2436            y vector for the matrix-vector product y = Ax.
2437 .  n - This value should be the same as the local size used in creating the
2438        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2439        calculated if N is given) For square matrices n is almost always m.
2440 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2441 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2442 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2443            (same value is used for all local rows)
2444 .  d_nnz - array containing the number of nonzeros in the various rows of the
2445            DIAGONAL portion of the local submatrix (possibly different for each row)
2446            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2447            The size of this array is equal to the number of local rows, i.e 'm'.
2448            You must leave room for the diagonal entry even if it is zero.
2449 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2450            submatrix (same value is used for all local rows).
2451 -  o_nnz - array containing the number of nonzeros in the various rows of the
2452            OFF-DIAGONAL portion of the local submatrix (possibly different for
2453            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2454            structure. The size of this array is equal to the number
2455            of local rows, i.e 'm'.
2456 
2457    Output Parameter:
2458 .  A - the matrix
2459 
2460    Notes:
2461    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2462    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2463    storage requirements for this matrix.
2464 
2465    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2466    processor than it must be used on all processors that share the object for
2467    that argument.
2468 
2469    The user MUST specify either the local or global matrix dimensions
2470    (possibly both).
2471 
2472    The parallel matrix is partitioned such that the first m0 rows belong to
2473    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2474    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2475 
2476    The DIAGONAL portion of the local submatrix of a processor can be defined
2477    as the submatrix which is obtained by extraction the part corresponding
2478    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2479    first row that belongs to the processor, and r2 is the last row belonging
2480    to the this processor. This is a square mxm matrix. The remaining portion
2481    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2482 
2483    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2484 
2485    When calling this routine with a single process communicator, a matrix of
2486    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2487    type of communicator, use the construction mechanism:
2488      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2489 
2490    By default, this format uses inodes (identical nodes) when possible.
2491    We search for consecutive rows with the same nonzero structure, thereby
2492    reusing matrix information to achieve increased efficiency.
2493 
2494    Options Database Keys:
2495 +  -mat_aij_no_inode  - Do not use inodes
2496 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2497 -  -mat_aij_oneindex - Internally use indexing starting at 1
2498         rather than 0.  Note that when calling MatSetValues(),
2499         the user still MUST index entries starting at 0!
2500 
2501 
2502    Example usage:
2503 
2504    Consider the following 8x8 matrix with 34 non-zero values, that is
2505    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2506    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2507    as follows:
2508 
2509 .vb
2510             1  2  0  |  0  3  0  |  0  4
2511     Proc0   0  5  6  |  7  0  0  |  8  0
2512             9  0 10  | 11  0  0  | 12  0
2513     -------------------------------------
2514            13  0 14  | 15 16 17  |  0  0
2515     Proc1   0 18  0  | 19 20 21  |  0  0
2516             0  0  0  | 22 23  0  | 24  0
2517     -------------------------------------
2518     Proc2  25 26 27  |  0  0 28  | 29  0
2519            30  0  0  | 31 32 33  |  0 34
2520 .ve
2521 
2522    This can be represented as a collection of submatrices as:
2523 
2524 .vb
2525       A B C
2526       D E F
2527       G H I
2528 .ve
2529 
2530    Where the submatrices A,B,C are owned by proc0, D,E,F are
2531    owned by proc1, G,H,I are owned by proc2.
2532 
2533    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2534    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2535    The 'M','N' parameters are 8,8, and have the same values on all procs.
2536 
2537    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2538    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2539    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2540    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2541    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2542    matrix, ans [DF] as another SeqAIJ matrix.
2543 
2544    When d_nz, o_nz parameters are specified, d_nz storage elements are
2545    allocated for every row of the local diagonal submatrix, and o_nz
2546    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2547    One way to choose d_nz and o_nz is to use the max nonzerors per local
2548    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2549    In this case, the values of d_nz,o_nz are:
2550 .vb
2551      proc0 : dnz = 2, o_nz = 2
2552      proc1 : dnz = 3, o_nz = 2
2553      proc2 : dnz = 1, o_nz = 4
2554 .ve
2555    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2556    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2557    for proc3. i.e we are using 12+15+10=37 storage locations to store
2558    34 values.
2559 
2560    When d_nnz, o_nnz parameters are specified, the storage is specified
2561    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2562    In the above case the values for d_nnz,o_nnz are:
2563 .vb
2564      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2565      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2566      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2567 .ve
2568    Here the space allocated is sum of all the above values i.e 34, and
2569    hence pre-allocation is perfect.
2570 
2571    Level: intermediate
2572 
2573 .keywords: matrix, aij, compressed row, sparse, parallel
2574 
2575 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2576           MPIAIJ
2577 @*/
2578 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2579 {
2580   PetscErrorCode ierr;
2581   PetscMPIInt    size;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2586   if (size > 1) {
2587     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2588     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2589   } else {
2590     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2591     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2592   }
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2598 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2599 {
2600   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2601 
2602   PetscFunctionBegin;
2603   *Ad     = a->A;
2604   *Ao     = a->B;
2605   *colmap = a->garray;
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 #undef __FUNCT__
2610 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2611 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2612 {
2613   PetscErrorCode ierr;
2614   PetscInt       i;
2615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2616 
2617   PetscFunctionBegin;
2618   if (coloring->ctype == IS_COLORING_LOCAL) {
2619     ISColoringValue *allcolors,*colors;
2620     ISColoring      ocoloring;
2621 
2622     /* set coloring for diagonal portion */
2623     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2624 
2625     /* set coloring for off-diagonal portion */
2626     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2627     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2628     for (i=0; i<a->B->n; i++) {
2629       colors[i] = allcolors[a->garray[i]];
2630     }
2631     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2632     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2633     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2634     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2635   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2636     ISColoringValue *colors;
2637     PetscInt             *larray;
2638     ISColoring      ocoloring;
2639 
2640     /* set coloring for diagonal portion */
2641     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2642     for (i=0; i<a->A->n; i++) {
2643       larray[i] = i + a->cstart;
2644     }
2645     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2646     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2647     for (i=0; i<a->A->n; i++) {
2648       colors[i] = coloring->colors[larray[i]];
2649     }
2650     ierr = PetscFree(larray);CHKERRQ(ierr);
2651     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2652     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2653     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2654 
2655     /* set coloring for off-diagonal portion */
2656     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2657     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2658     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2659     for (i=0; i<a->B->n; i++) {
2660       colors[i] = coloring->colors[larray[i]];
2661     }
2662     ierr = PetscFree(larray);CHKERRQ(ierr);
2663     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2664     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2665     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2666   } else {
2667     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2668   }
2669 
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2675 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2676 {
2677   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2678   PetscErrorCode ierr;
2679 
2680   PetscFunctionBegin;
2681   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2682   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2688 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2689 {
2690   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2691   PetscErrorCode ierr;
2692 
2693   PetscFunctionBegin;
2694   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2695   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatMerge"
2701 /*@C
2702       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2703                  matrices from each processor
2704 
2705     Collective on MPI_Comm
2706 
2707    Input Parameters:
2708 +    comm - the communicators the parallel matrix will live on
2709 .    inmat - the input sequential matrices
2710 .    n - number of local columns (or PETSC_DECIDE)
2711 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2712 
2713    Output Parameter:
2714 .    outmat - the parallel matrix generated
2715 
2716     Level: advanced
2717 
2718    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2719 
2720 @*/
2721 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2722 {
2723   PetscErrorCode    ierr;
2724   PetscInt          m,N,i,rstart,nnz,I,*dnz,*onz;
2725   const PetscInt    *indx;
2726   const PetscScalar *values;
2727   PetscMap          columnmap,rowmap;
2728 
2729   PetscFunctionBegin;
2730     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2731   /*
2732   PetscMPIInt       rank;
2733   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2734   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2735   */
2736   if (scall == MAT_INITIAL_MATRIX){
2737     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2738     if (n == PETSC_DECIDE){
2739       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2740       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2741       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2742       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2743       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2744     }
2745 
2746     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2747     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2748     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2749     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2750     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2751 
2752     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2753     for (i=0;i<m;i++) {
2754       ierr = MatGetRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2755       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2756       ierr = MatRestoreRow(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2757     }
2758     /* This routine will ONLY return MPIAIJ type matrix */
2759     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2760     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2761     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2762     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2763 
2764   } else if (scall == MAT_REUSE_MATRIX){
2765     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2766   } else {
2767     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2768   }
2769 
2770   for (i=0;i<m;i++) {
2771     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2772     I    = i + rstart;
2773     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2774     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2775   }
2776   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2777   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2779 
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatFileSplit"
2785 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2786 {
2787   PetscErrorCode    ierr;
2788   PetscMPIInt       rank;
2789   PetscInt          m,N,i,rstart,nnz;
2790   size_t            len;
2791   const PetscInt    *indx;
2792   PetscViewer       out;
2793   char              *name;
2794   Mat               B;
2795   const PetscScalar *values;
2796 
2797   PetscFunctionBegin;
2798   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2799   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2800   /* Should this be the type of the diagonal block of A? */
2801   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2802   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2803   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2804   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2805   for (i=0;i<m;i++) {
2806     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2807     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2808     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2809   }
2810   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2811   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812 
2813   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2814   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2815   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2816   sprintf(name,"%s.%d",outfile,rank);
2817   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2818   ierr = PetscFree(name);
2819   ierr = MatView(B,out);CHKERRQ(ierr);
2820   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2821   ierr = MatDestroy(B);CHKERRQ(ierr);
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2828 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2829 {
2830   PetscErrorCode       ierr;
2831   Mat_Merge_SeqsToMPI  *merge;
2832   PetscObjectContainer container;
2833 
2834   PetscFunctionBegin;
2835   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2836   if (container) {
2837     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2838     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2839     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2840     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2841     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2842     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2843     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2844     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2845     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2846     if (merge->ci){ierr = PetscFree(merge->ci);CHKERRQ(ierr);}
2847     if (merge->cj){ierr = PetscFree(merge->cj);CHKERRQ(ierr);}
2848     if (merge->C_seq){ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);}
2849 
2850     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2851     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2852   }
2853   ierr = PetscFree(merge);CHKERRQ(ierr);
2854 
2855   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #include "src/mat/utils/freespace.h"
2860 #include "petscbt.h"
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatMerge_SeqsToMPI"
2863 /*@C
2864       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2865                  matrices from each processor
2866 
2867     Collective on MPI_Comm
2868 
2869    Input Parameters:
2870 +    comm - the communicators the parallel matrix will live on
2871 .    seqmat - the input sequential matrices
2872 .    m - number of local rows (or PETSC_DECIDE)
2873 .    n - number of local columns (or PETSC_DECIDE)
2874 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2875 
2876    Output Parameter:
2877 .    mpimat - the parallel matrix generated
2878 
2879     Level: advanced
2880 
2881    Notes:
2882      The dimensions of the sequential matrix in each processor MUST be the same.
2883      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2884      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2885 @*/
2886 static PetscEvent logkey_seqstompinum = 0;
2887 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2888 {
2889   PetscErrorCode       ierr;
2890   MPI_Comm             comm=mpimat->comm;
2891   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2892   PetscMPIInt          size,rank,taga,*len_s;
2893   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2894   PetscInt             proc,m;
2895   PetscInt             **buf_ri,**buf_rj;
2896   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2897   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2898   MPI_Request          *s_waits,*r_waits;
2899   MPI_Status           *status;
2900   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2901   Mat_Merge_SeqsToMPI  *merge;
2902   PetscObjectContainer container;
2903 
2904   PetscFunctionBegin;
2905   if (!logkey_seqstompinum) {
2906     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2907   }
2908   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2909 
2910   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2911   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2912 
2913   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2914   if (container) {
2915     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2916   }
2917   bi     = merge->bi;
2918   bj     = merge->bj;
2919   buf_ri = merge->buf_ri;
2920   buf_rj = merge->buf_rj;
2921 
2922   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2923   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2924   len_s  = merge->len_s;
2925 
2926   /* send and recv matrix values */
2927   /*-----------------------------*/
2928   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2929   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2930 
2931   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2932   for (proc=0,k=0; proc<size; proc++){
2933     if (!len_s[proc]) continue;
2934     i = owners[proc];
2935     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2936     k++;
2937   }
2938 
2939   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2940   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2941   ierr = PetscFree(status);CHKERRQ(ierr);
2942 
2943   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2944   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2945 
2946   /* insert mat values of mpimat */
2947   /*----------------------------*/
2948   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2949   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2950   nextrow = buf_ri_k + merge->nrecv;
2951   nextai  = nextrow + merge->nrecv;
2952 
2953   for (k=0; k<merge->nrecv; k++){
2954     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2955     nrows = *(buf_ri_k[k]);
2956     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2957     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2958   }
2959 
2960   /* set values of ba */
2961   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2962   for (i=0; i<m; i++) {
2963     arow = owners[rank] + i;
2964     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2965     bnzi = bi[i+1] - bi[i];
2966     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2967 
2968     /* add local non-zero vals of this proc's seqmat into ba */
2969     anzi = ai[arow+1] - ai[arow];
2970     aj   = a->j + ai[arow];
2971     aa   = a->a + ai[arow];
2972     nextaj = 0;
2973     for (j=0; nextaj<anzi; j++){
2974       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2975         ba_i[j] += aa[nextaj++];
2976       }
2977     }
2978 
2979     /* add received vals into ba */
2980     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2981       /* i-th row */
2982       if (i == *nextrow[k]) {
2983         anzi = *(nextai[k]+1) - *nextai[k];
2984         aj   = buf_rj[k] + *(nextai[k]);
2985         aa   = abuf_r[k] + *(nextai[k]);
2986         nextaj = 0;
2987         for (j=0; nextaj<anzi; j++){
2988           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2989             ba_i[j] += aa[nextaj++];
2990           }
2991         }
2992         nextrow[k]++; nextai[k]++;
2993       }
2994     }
2995     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2996   }
2997   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2999 
3000   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3001   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3002   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3003   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 static PetscEvent logkey_seqstompisym = 0;
3007 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3008 {
3009   PetscErrorCode       ierr;
3010   Mat                  B_mpi;
3011   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3012   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3013   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3014   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3015   PetscInt             len,proc,*dnz,*onz;
3016   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3017   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3018   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3019   MPI_Status           *status;
3020   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3021   PetscBT              lnkbt;
3022   Mat_Merge_SeqsToMPI  *merge;
3023   PetscObjectContainer container;
3024 
3025   PetscFunctionBegin;
3026   if (!logkey_seqstompisym) {
3027     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3028   }
3029   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3030 
3031   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3032   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3033 
3034   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3035   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3036 
3037   /* determine row ownership */
3038   /*---------------------------------------------------------*/
3039   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3040   if (m == PETSC_DECIDE) {
3041     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3042   } else {
3043     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3044   }
3045   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3046   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3047   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3048 
3049   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3050   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3051 
3052   /* determine the number of messages to send, their lengths */
3053   /*---------------------------------------------------------*/
3054   len_s  = merge->len_s;
3055 
3056   len = 0;  /* length of buf_si[] */
3057   merge->nsend = 0;
3058   for (proc=0; proc<size; proc++){
3059     len_si[proc] = 0;
3060     if (proc == rank){
3061       len_s[proc] = 0;
3062     } else {
3063       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3064       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3065     }
3066     if (len_s[proc]) {
3067       merge->nsend++;
3068       nrows = 0;
3069       for (i=owners[proc]; i<owners[proc+1]; i++){
3070         if (ai[i+1] > ai[i]) nrows++;
3071       }
3072       len_si[proc] = 2*(nrows+1);
3073       len += len_si[proc];
3074     }
3075   }
3076 
3077   /* determine the number and length of messages to receive for ij-structure */
3078   /*-------------------------------------------------------------------------*/
3079   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3080   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3081 
3082   /* post the Irecv of j-structure */
3083   /*-------------------------------*/
3084   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3085   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3086 
3087   /* post the Isend of j-structure */
3088   /*--------------------------------*/
3089   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3090   sj_waits = si_waits + merge->nsend;
3091 
3092   for (proc=0, k=0; proc<size; proc++){
3093     if (!len_s[proc]) continue;
3094     i = owners[proc];
3095     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3096     k++;
3097   }
3098 
3099   /* receives and sends of j-structure are complete */
3100   /*------------------------------------------------*/
3101   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3102   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3103 
3104   /* send and recv i-structure */
3105   /*---------------------------*/
3106   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3107   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3108 
3109   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3110   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3111   for (proc=0,k=0; proc<size; proc++){
3112     if (!len_s[proc]) continue;
3113     /* form outgoing message for i-structure:
3114          buf_si[0]:                 nrows to be sent
3115                [1:nrows]:           row index (global)
3116                [nrows+1:2*nrows+1]: i-structure index
3117     */
3118     /*-------------------------------------------*/
3119     nrows = len_si[proc]/2 - 1;
3120     buf_si_i    = buf_si + nrows+1;
3121     buf_si[0]   = nrows;
3122     buf_si_i[0] = 0;
3123     nrows = 0;
3124     for (i=owners[proc]; i<owners[proc+1]; i++){
3125       anzi = ai[i+1] - ai[i];
3126       if (anzi) {
3127         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3128         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3129         nrows++;
3130       }
3131     }
3132     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3133     k++;
3134     buf_si += len_si[proc];
3135   }
3136 
3137   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3138   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3139 
3140   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3141   for (i=0; i<merge->nrecv; i++){
3142     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3143   }
3144 
3145   ierr = PetscFree(len_si);CHKERRQ(ierr);
3146   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3147   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3148   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3149   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3150   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3151   ierr = PetscFree(status);CHKERRQ(ierr);
3152 
3153   /* compute a local seq matrix in each processor */
3154   /*----------------------------------------------*/
3155   /* allocate bi array and free space for accumulating nonzero column info */
3156   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3157   bi[0] = 0;
3158 
3159   /* create and initialize a linked list */
3160   nlnk = N+1;
3161   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3162 
3163   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3164   len = 0;
3165   len  = ai[owners[rank+1]] - ai[owners[rank]];
3166   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3167   current_space = free_space;
3168 
3169   /* determine symbolic info for each local row */
3170   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3171   nextrow = buf_ri_k + merge->nrecv;
3172   nextai  = nextrow + merge->nrecv;
3173   for (k=0; k<merge->nrecv; k++){
3174     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3175     nrows = *buf_ri_k[k];
3176     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3177     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3178   }
3179 
3180   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3181   len = 0;
3182   for (i=0;i<m;i++) {
3183     bnzi   = 0;
3184     /* add local non-zero cols of this proc's seqmat into lnk */
3185     arow   = owners[rank] + i;
3186     anzi   = ai[arow+1] - ai[arow];
3187     aj     = a->j + ai[arow];
3188     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3189     bnzi += nlnk;
3190     /* add received col data into lnk */
3191     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3192       if (i == *nextrow[k]) { /* i-th row */
3193         anzi = *(nextai[k]+1) - *nextai[k];
3194         aj   = buf_rj[k] + *nextai[k];
3195         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3196         bnzi += nlnk;
3197         nextrow[k]++; nextai[k]++;
3198       }
3199     }
3200     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3201 
3202     /* if free space is not available, make more free space */
3203     if (current_space->local_remaining<bnzi) {
3204       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3205       nspacedouble++;
3206     }
3207     /* copy data into free space, then initialize lnk */
3208     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3209     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3210 
3211     current_space->array           += bnzi;
3212     current_space->local_used      += bnzi;
3213     current_space->local_remaining -= bnzi;
3214 
3215     bi[i+1] = bi[i] + bnzi;
3216   }
3217 
3218   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3219 
3220   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3221   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3222   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3223 
3224   /* create symbolic parallel matrix B_mpi */
3225   /*---------------------------------------*/
3226   if (n==PETSC_DECIDE) {
3227     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3228   } else {
3229     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3230   }
3231   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3232   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3233   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3234 
3235   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3236   B_mpi->assembled     = PETSC_FALSE;
3237   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3238   merge->bi            = bi;
3239   merge->bj            = bj;
3240   merge->buf_ri        = buf_ri;
3241   merge->buf_rj        = buf_rj;
3242   merge->C_seq         = seqmat;
3243   merge->ci            = PETSC_NULL;
3244   merge->cj            = PETSC_NULL;
3245 
3246   /* attach the supporting struct to B_mpi for reuse */
3247   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3248   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3249   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3250   *mpimat = B_mpi;
3251   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3252   PetscFunctionReturn(0);
3253 }
3254 
3255 static PetscEvent logkey_seqstompi = 0;
3256 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3257 {
3258   PetscErrorCode   ierr;
3259 
3260   PetscFunctionBegin;
3261   if (!logkey_seqstompi) {
3262     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3263   }
3264   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3265   if (scall == MAT_INITIAL_MATRIX){
3266     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3267   }
3268   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3269   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3270   PetscFunctionReturn(0);
3271 }
3272 static PetscEvent logkey_getlocalmat = 0;
3273 #undef __FUNCT__
3274 #define __FUNCT__ "MatGetLocalMat"
3275 /*@C
3276      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3277 
3278     Not Collective
3279 
3280    Input Parameters:
3281 +    A - the matrix
3282 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3283 
3284    Output Parameter:
3285 .    A_loc - the local sequential matrix generated
3286 
3287     Level: developer
3288 
3289 @*/
3290 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3291 {
3292   PetscErrorCode  ierr;
3293   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3294   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3295   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3296   MatScalar       *aa=a->a,*ba=b->a;
3297   PetscInt        am=A->m,i,j,*nnz,nnz_max,ncols,*cols,cstart=mpimat->cstart;
3298   Mat             Aw;
3299 
3300   PetscFunctionBegin;
3301   if (!logkey_getlocalmat) {
3302     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3303   }
3304   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3305   if (scall == MAT_INITIAL_MATRIX){
3306     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&nnz);CHKERRQ(ierr);
3307     nnz_max = 0;
3308     for (i=0; i<am; i++){
3309       nnz[i] = (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3310       if (nnz[i] > nnz_max) nnz_max = nnz[i];
3311     }
3312     ierr = MatCreate(PETSC_COMM_SELF,am,A->N,am,A->N,&Aw);CHKERRQ(ierr);
3313     ierr = MatSetType(Aw,MATSEQAIJ);CHKERRQ(ierr);
3314     ierr = MatSeqAIJSetPreallocation(Aw,0,nnz);CHKERRQ(ierr);
3315   } else if (scall == MAT_REUSE_MATRIX){
3316     Aw = *A_loc;
3317     mat=(Mat_SeqAIJ*)Aw->data;
3318     nnz_max = 0;
3319     for (i=0; i<am; i++){
3320       j = mat->i[i+1] - mat->i[i];
3321       if (j > nnz_max) nnz_max = j;
3322     }
3323     /* clear old values in *A_loc */
3324     ierr = PetscMemzero(mat->a,mat->i[am]*sizeof(MatScalar));CHKERRQ(ierr);
3325   } else {
3326     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3327   }
3328   ierr = PetscMalloc((1+nnz_max)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3329 
3330   for (i=0; i<am; i++) {
3331     /* diagonal portion of A */
3332     ncols = ai[i+1] - ai[i];
3333     for (j=0; j<ncols; j++) {cols[j] = cstart + *aj++;}
3334     ierr = MatSetValues(Aw,1,&i,ncols,cols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
3335     /* off-diagonal portion of A */
3336     ncols = bi[i+1] - bi[i];
3337     for (j=0; j<ncols; j++) cols[j] = cmap[*bj++]; /* cmap[*(bj+bi[i]+j)]; */
3338     ierr = MatSetValues(Aw,1,&i,ncols,cols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
3339   }
3340   ierr = MatAssemblyBegin(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3341   ierr = MatAssemblyEnd(Aw,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3342   ierr = PetscFree(cols);CHKERRQ(ierr);
3343   if (scall == MAT_INITIAL_MATRIX){
3344     ierr = PetscFree(nnz);CHKERRQ(ierr);
3345     *A_loc = Aw;
3346   }
3347 
3348   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 static PetscEvent logkey_getlocalmatcondensed = 0;
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatGetLocalMatCondensed"
3355 /*@C
3356      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3357 
3358     Not Collective
3359 
3360    Input Parameters:
3361 +    A - the matrix
3362 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3363 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3364 
3365    Output Parameter:
3366 .    A_loc - the local sequential matrix generated
3367 
3368     Level: developer
3369 
3370 @*/
3371 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3372 {
3373   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3374   PetscErrorCode    ierr;
3375   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3376   IS                isrowa,iscola;
3377   Mat               *aloc;
3378 
3379   PetscFunctionBegin;
3380   if (!logkey_getlocalmatcondensed) {
3381     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3382   }
3383   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3384   if (!row){
3385     start = a->rstart; end = a->rend;
3386     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3387   } else {
3388     isrowa = *row;
3389   }
3390   if (!col){
3391     start = a->cstart;
3392     cmap  = a->garray;
3393     nzA   = a->A->n;
3394     nzB   = a->B->n;
3395     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3396     ncols = 0;
3397     for (i=0; i<nzB; i++) {
3398       if (cmap[i] < start) idx[ncols++] = cmap[i];
3399       else break;
3400     }
3401     imark = i;
3402     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3403     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3404     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3405     ierr = PetscFree(idx);CHKERRQ(ierr);
3406   } else {
3407     iscola = *col;
3408   }
3409   if (scall != MAT_INITIAL_MATRIX){
3410     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3411     aloc[0] = *A_loc;
3412   }
3413   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3414   *A_loc = aloc[0];
3415   ierr = PetscFree(aloc);CHKERRQ(ierr);
3416   if (!row){
3417     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3418   }
3419   if (!col){
3420     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3421   }
3422   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 static PetscEvent logkey_GetBrowsOfAcols = 0;
3427 #undef __FUNCT__
3428 #define __FUNCT__ "MatGetBrowsOfAcols"
3429 /*@C
3430     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3431 
3432     Collective on Mat
3433 
3434    Input Parameters:
3435 +    A,B - the matrices in mpiaij format
3436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3437 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3438 
3439    Output Parameter:
3440 +    rowb, colb - index sets of rows and columns of B to extract
3441 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3442 -    B_seq - the sequential matrix generated
3443 
3444     Level: developer
3445 
3446 @*/
3447 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3448 {
3449   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3450   PetscErrorCode    ierr;
3451   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3452   IS                isrowb,iscolb;
3453   Mat               *bseq;
3454 
3455   PetscFunctionBegin;
3456   if (a->cstart != b->rstart || a->cend != b->rend){
3457     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3458   }
3459   if (!logkey_GetBrowsOfAcols) {
3460     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3461   }
3462   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3463 
3464   if (scall == MAT_INITIAL_MATRIX){
3465     start = a->cstart;
3466     cmap  = a->garray;
3467     nzA   = a->A->n;
3468     nzB   = a->B->n;
3469     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3470     ncols = 0;
3471     for (i=0; i<nzB; i++) {  /* row < local row index */
3472       if (cmap[i] < start) idx[ncols++] = cmap[i];
3473       else break;
3474     }
3475     imark = i;
3476     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3477     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3478     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3479     ierr = PetscFree(idx);CHKERRQ(ierr);
3480     *brstart = imark;
3481     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3482   } else {
3483     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3484     isrowb = *rowb; iscolb = *colb;
3485     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3486     bseq[0] = *B_seq;
3487   }
3488   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3489   *B_seq = bseq[0];
3490   ierr = PetscFree(bseq);CHKERRQ(ierr);
3491   if (!rowb){
3492     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3493   } else {
3494     *rowb = isrowb;
3495   }
3496   if (!colb){
3497     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3498   } else {
3499     *colb = iscolb;
3500   }
3501   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 static PetscEvent logkey_GetBrowsOfAocols = 0;
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatGetBrowsOfAoCols"
3508 /*@C
3509     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3510     of the OFF-DIAGONAL portion of local A
3511 
3512     Collective on Mat
3513 
3514    Input Parameters:
3515 +    A,B - the matrices in mpiaij format
3516 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3517 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3518 
3519    Output Parameter:
3520 +    rowb, colb - index sets of rows and columns of B to extract
3521 .    brstart - row index of B_seq from which the remaining rows correspond to A's columns that are greater than cend.
3522 -    B_seq - the sequential matrix generated
3523 
3524     Level: developer
3525 
3526 @*/
3527 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3528 {
3529   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3530   PetscErrorCode    ierr;
3531   PetscInt          *idx,i,start,ncols,nzB,*cmap;
3532   IS                isrowb,iscolb;
3533   Mat               *bseq;
3534 
3535   PetscFunctionBegin;
3536   if (a->cstart != b->rstart || a->cend != b->rend){
3537     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3538   }
3539   if (!logkey_GetBrowsOfAocols) {
3540     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrowsOfAoCols",MAT_COOKIE);
3541   }
3542   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3543 
3544   if (scall == MAT_INITIAL_MATRIX){
3545     start = a->cstart;
3546     cmap  = a->garray;
3547     nzB   = a->B->n;
3548     if (!nzB){ /* output a null matrix */
3549       B_seq = PETSC_NULL;
3550       ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3551       PetscFunctionReturn(0);
3552     }
3553     ierr  = PetscMalloc(nzB*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3554     ncols = 0;
3555     *brstart = nzB;
3556     for (i=0; i<nzB; i++) {  /* row < local row index */
3557       if (cmap[i] < start) idx[ncols++] = cmap[i];
3558       else break;
3559     }
3560     *brstart = i;
3561     for (i=*brstart; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3562     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3563     ierr = PetscFree(idx);CHKERRQ(ierr);
3564 
3565     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3566   } else {
3567     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3568     isrowb = *rowb; iscolb = *colb;
3569     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3570     bseq[0] = *B_seq;
3571   }
3572   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3573   *B_seq = bseq[0];
3574   ierr = PetscFree(bseq);CHKERRQ(ierr);
3575   if (!rowb){
3576     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3577   } else {
3578     *rowb = isrowb;
3579   }
3580   if (!colb){
3581     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3582   } else {
3583     *colb = iscolb;
3584   }
3585   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 /*MC
3590    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3591 
3592    Options Database Keys:
3593 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3594 
3595   Level: beginner
3596 
3597 .seealso: MatCreateMPIAIJ
3598 M*/
3599 
3600 EXTERN_C_BEGIN
3601 #undef __FUNCT__
3602 #define __FUNCT__ "MatCreate_MPIAIJ"
3603 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3604 {
3605   Mat_MPIAIJ     *b;
3606   PetscErrorCode ierr;
3607   PetscInt       i;
3608   PetscMPIInt    size;
3609 
3610   PetscFunctionBegin;
3611   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3612 
3613   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3614   B->data         = (void*)b;
3615   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3616   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3617   B->factor       = 0;
3618   B->assembled    = PETSC_FALSE;
3619   B->mapping      = 0;
3620 
3621   B->insertmode      = NOT_SET_VALUES;
3622   b->size            = size;
3623   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3624 
3625   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3626   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3627 
3628   /* the information in the maps duplicates the information computed below, eventually
3629      we should remove the duplicate information that is not contained in the maps */
3630   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3631   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3632 
3633   /* build local table of row and column ownerships */
3634   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3635   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3636   b->cowners = b->rowners + b->size + 2;
3637   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3638   b->rowners[0] = 0;
3639   for (i=2; i<=b->size; i++) {
3640     b->rowners[i] += b->rowners[i-1];
3641   }
3642   b->rstart = b->rowners[b->rank];
3643   b->rend   = b->rowners[b->rank+1];
3644   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3645   b->cowners[0] = 0;
3646   for (i=2; i<=b->size; i++) {
3647     b->cowners[i] += b->cowners[i-1];
3648   }
3649   b->cstart = b->cowners[b->rank];
3650   b->cend   = b->cowners[b->rank+1];
3651 
3652   /* build cache for off array entries formed */
3653   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3654   b->donotstash  = PETSC_FALSE;
3655   b->colmap      = 0;
3656   b->garray      = 0;
3657   b->roworiented = PETSC_TRUE;
3658 
3659   /* stuff used for matrix vector multiply */
3660   b->lvec      = PETSC_NULL;
3661   b->Mvctx     = PETSC_NULL;
3662 
3663   /* stuff for MatGetRow() */
3664   b->rowindices   = 0;
3665   b->rowvalues    = 0;
3666   b->getrowactive = PETSC_FALSE;
3667 
3668   /* Explicitly create 2 MATSEQAIJ matrices. */
3669   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3670   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3671   PetscLogObjectParent(B,b->A);
3672   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3673   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3674   PetscLogObjectParent(B,b->B);
3675 
3676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3677                                      "MatStoreValues_MPIAIJ",
3678                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3680                                      "MatRetrieveValues_MPIAIJ",
3681                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3683 				     "MatGetDiagonalBlock_MPIAIJ",
3684                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3686 				     "MatIsTranspose_MPIAIJ",
3687 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3689 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3690 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3692 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3693 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3695 				     "MatDiagonalScaleLocal_MPIAIJ",
3696 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3697   PetscFunctionReturn(0);
3698 }
3699 EXTERN_C_END
3700