xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6ad4291fb3cf799a408e09ec04611269bedac2ad)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (PetscInt*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (PetscInt*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
278   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   PetscInt       nstash,reallocs;
322   InsertMode     addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 #undef __FUNCT__
343 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
344 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
345 {
346   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
347   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
348   PetscErrorCode ierr;
349   PetscMPIInt    n;
350   PetscInt       i,j,rstart,ncols,flg;
351   PetscInt       *row,*col,other_disassembled;
352   PetscScalar    *val;
353   InsertMode     addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387       /* reaccess the b because aij->B was changed */
388       b    = (Mat_SeqAIJ *)aij->B->data;
389     }
390   }
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
394   }
395 
396   b->inode.use             = PETSC_FALSE;
397   b->compressedrow.use     = PETSC_TRUE;
398   c->compressedrow.checked = PETSC_FALSE;
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 
407   /* used by MatAXPY() */
408   a->xtoy = 0; b->xtoy = 0;
409   a->XtoY = 0; b->XtoY = 0;
410 
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   PetscErrorCode ierr;
433   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
434   PetscInt       i,N,*rows,*owners = l->rowners;
435   PetscInt       *nprocs,j,idx,nsends,row;
436   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
437   PetscInt       *rvalues,count,base,slen,*source;
438   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
439   MPI_Comm       comm = A->comm;
440   MPI_Request    *send_waits,*recv_waits;
441   MPI_Status     recv_status,*send_status;
442   IS             istmp;
443   PetscTruth     found;
444 
445   PetscFunctionBegin;
446   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
447   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
448 
449   /*  first count number of contributors to each processor */
450   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
451   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
452   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
453   for (i=0; i<N; i++) {
454     idx = rows[i];
455     found = PETSC_FALSE;
456     for (j=0; j<size; j++) {
457       if (idx >= owners[j] && idx < owners[j+1]) {
458         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
459       }
460     }
461     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
462   }
463   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
464 
465   /* inform other processors of number of messages and max length*/
466   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
467 
468   /* post receives:   */
469   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
470   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
471   for (i=0; i<nrecvs; i++) {
472     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
473   }
474 
475   /* do sends:
476       1) starts[i] gives the starting index in svalues for stuff going to
477          the ith processor
478   */
479   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
480   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
481   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
482   starts[0] = 0;
483   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   for (i=0; i<N; i++) {
485     svalues[starts[owner[i]]++] = rows[i];
486   }
487   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
488 
489   starts[0] = 0;
490   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
491   count = 0;
492   for (i=0; i<size; i++) {
493     if (nprocs[2*i+1]) {
494       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
495     }
496   }
497   ierr = PetscFree(starts);CHKERRQ(ierr);
498 
499   base = owners[rank];
500 
501   /*  wait on receives */
502   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
503   source = lens + nrecvs;
504   count  = nrecvs; slen = 0;
505   while (count) {
506     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
507     /* unpack receives into our local space */
508     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
509     source[imdex]  = recv_status.MPI_SOURCE;
510     lens[imdex]    = n;
511     slen          += n;
512     count--;
513   }
514   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
515 
516   /* move the data into the send scatter */
517   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
518   count = 0;
519   for (i=0; i<nrecvs; i++) {
520     values = rvalues + i*nmax;
521     for (j=0; j<lens[i]; j++) {
522       lrows[count++] = values[j] - base;
523     }
524   }
525   ierr = PetscFree(rvalues);CHKERRQ(ierr);
526   ierr = PetscFree(lens);CHKERRQ(ierr);
527   ierr = PetscFree(owner);CHKERRQ(ierr);
528   ierr = PetscFree(nprocs);CHKERRQ(ierr);
529 
530   /* actually zap the local rows */
531   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
532   PetscLogObjectParent(A,istmp);
533 
534   /*
535         Zero the required rows. If the "diagonal block" of the matrix
536      is square and the user wishes to set the diagonal we use seperate
537      code so that MatSetValues() is not called for each diagonal allocating
538      new memory, thus calling lots of mallocs and slowing things down.
539 
540        Contributed by: Mathew Knepley
541   */
542   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
543   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
544   if (diag && (l->A->M == l->A->N)) {
545     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
546   } else if (diag) {
547     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
548     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
549       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
550 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
551     }
552     for (i = 0; i < slen; i++) {
553       row  = lrows[i] + rstart;
554       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
555     }
556     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
557     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558   } else {
559     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
560   }
561   ierr = ISDestroy(istmp);CHKERRQ(ierr);
562   ierr = PetscFree(lrows);CHKERRQ(ierr);
563 
564   /* wait on sends */
565   if (nsends) {
566     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
567     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
568     ierr = PetscFree(send_status);CHKERRQ(ierr);
569   }
570   ierr = PetscFree(send_waits);CHKERRQ(ierr);
571   ierr = PetscFree(svalues);CHKERRQ(ierr);
572 
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMult_MPIAIJ"
578 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscInt       nt;
583 
584   PetscFunctionBegin;
585   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
586   if (nt != A->n) {
587     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
588   }
589   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
590   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
591   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
593   PetscFunctionReturn(0);
594 }
595 
596 #undef __FUNCT__
597 #define __FUNCT__ "MatMultAdd_MPIAIJ"
598 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
599 {
600   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
601   PetscErrorCode ierr;
602 
603   PetscFunctionBegin;
604   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
605   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
606   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
613 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
614 {
615   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
616   PetscErrorCode ierr;
617 
618   PetscFunctionBegin;
619   /* do nondiagonal part */
620   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
621   /* send it on its way */
622   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
623   /* do local part */
624   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
625   /* receive remote parts: note this assumes the values are not actually */
626   /* inserted in yy until the next line, which is true for my implementation*/
627   /* but is not perhaps always true. */
628   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
629   PetscFunctionReturn(0);
630 }
631 
632 EXTERN_C_BEGIN
633 #undef __FUNCT__
634 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
635 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
636 {
637   MPI_Comm       comm;
638   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
639   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
640   IS             Me,Notme;
641   PetscErrorCode ierr;
642   PetscInt       M,N,first,last,*notme,i;
643   PetscMPIInt    size;
644 
645   PetscFunctionBegin;
646 
647   /* Easy test: symmetric diagonal block */
648   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
649   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
650   if (!*f) PetscFunctionReturn(0);
651   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
652   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
653   if (size == 1) PetscFunctionReturn(0);
654 
655   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
656   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
657   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
658   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
659   for (i=0; i<first; i++) notme[i] = i;
660   for (i=last; i<M; i++) notme[i-last+first] = i;
661   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
662   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
663   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
664   Aoff = Aoffs[0];
665   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
666   Boff = Boffs[0];
667   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
668   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
670   ierr = ISDestroy(Me);CHKERRQ(ierr);
671   ierr = ISDestroy(Notme);CHKERRQ(ierr);
672 
673   PetscFunctionReturn(0);
674 }
675 EXTERN_C_END
676 
677 #undef __FUNCT__
678 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
679 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
680 {
681   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
682   PetscErrorCode ierr;
683 
684   PetscFunctionBegin;
685   /* do nondiagonal part */
686   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
687   /* send it on its way */
688   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
689   /* do local part */
690   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
691   /* receive remote parts: note this assumes the values are not actually */
692   /* inserted in yy until the next line, which is true for my implementation*/
693   /* but is not perhaps always true. */
694   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 /*
699   This only works correctly for square matrices where the subblock A->A is the
700    diagonal block
701 */
702 #undef __FUNCT__
703 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
704 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
705 {
706   PetscErrorCode ierr;
707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
708 
709   PetscFunctionBegin;
710   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
711   if (a->rstart != a->cstart || a->rend != a->cend) {
712     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
713   }
714   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatScale_MPIAIJ"
720 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
721 {
722   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
723   PetscErrorCode ierr;
724 
725   PetscFunctionBegin;
726   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
727   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatDestroy_MPIAIJ"
733 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
734 {
735   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
736   PetscErrorCode ierr;
737 
738   PetscFunctionBegin;
739 #if defined(PETSC_USE_LOG)
740   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
741 #endif
742   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
743   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
744   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
746 #if defined (PETSC_USE_CTABLE)
747   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
748 #else
749   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
750 #endif
751   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
752   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
753   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
754   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
755   ierr = PetscFree(aij);CHKERRQ(ierr);
756 
757   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNCT__
768 #define __FUNCT__ "MatView_MPIAIJ_Binary"
769 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
770 {
771   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
772   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
773   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
774   PetscErrorCode    ierr;
775   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
776   int               fd;
777   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
778   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
779   PetscScalar       *column_values;
780 
781   PetscFunctionBegin;
782   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
783   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
784   nz   = A->nz + B->nz;
785   if (!rank) {
786     header[0] = MAT_FILE_COOKIE;
787     header[1] = mat->M;
788     header[2] = mat->N;
789     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
790     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
791     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
792     /* get largest number of rows any processor has */
793     rlen = mat->m;
794     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       rlen = PetscMax(rlen,range[i+1] - range[i]);
797     }
798   } else {
799     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
800     rlen = mat->m;
801   }
802 
803   /* load up the local row counts */
804   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
805   for (i=0; i<mat->m; i++) {
806     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
807   }
808 
809   /* store the row lengths to the file */
810   if (!rank) {
811     MPI_Status status;
812     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
813     for (i=1; i<size; i++) {
814       rlen = range[i+1] - range[i];
815       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
816       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
817     }
818   } else {
819     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
820   }
821   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
822 
823   /* load up the local column indices */
824   nzmax = nz; /* )th processor needs space a largest processor needs */
825   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
826   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
827   cnt  = 0;
828   for (i=0; i<mat->m; i++) {
829     for (j=B->i[i]; j<B->i[i+1]; j++) {
830       if ( (col = garray[B->j[j]]) > cstart) break;
831       column_indices[cnt++] = col;
832     }
833     for (k=A->i[i]; k<A->i[i+1]; k++) {
834       column_indices[cnt++] = A->j[k] + cstart;
835     }
836     for (; j<B->i[i+1]; j++) {
837       column_indices[cnt++] = garray[B->j[j]];
838     }
839   }
840   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
841 
842   /* store the column indices to the file */
843   if (!rank) {
844     MPI_Status status;
845     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
846     for (i=1; i<size; i++) {
847       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
848       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
849       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
850       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
851     }
852   } else {
853     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
854     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
855   }
856   ierr = PetscFree(column_indices);CHKERRQ(ierr);
857 
858   /* load up the local column values */
859   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
860   cnt  = 0;
861   for (i=0; i<mat->m; i++) {
862     for (j=B->i[i]; j<B->i[i+1]; j++) {
863       if ( garray[B->j[j]] > cstart) break;
864       column_values[cnt++] = B->a[j];
865     }
866     for (k=A->i[i]; k<A->i[i+1]; k++) {
867       column_values[cnt++] = A->a[k];
868     }
869     for (; j<B->i[i+1]; j++) {
870       column_values[cnt++] = B->a[j];
871     }
872   }
873   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
874 
875   /* store the column values to the file */
876   if (!rank) {
877     MPI_Status status;
878     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
879     for (i=1; i<size; i++) {
880       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
881       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
882       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
883       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
884     }
885   } else {
886     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
887     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
888   }
889   ierr = PetscFree(column_values);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
895 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
896 {
897   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
898   PetscErrorCode    ierr;
899   PetscMPIInt       rank = aij->rank,size = aij->size;
900   PetscTruth        isdraw,iascii,flg,isbinary;
901   PetscViewer       sviewer;
902   PetscViewerFormat format;
903 
904   PetscFunctionBegin;
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
908   if (iascii) {
909     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
910     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
911       MatInfo info;
912       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
913       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
914       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
915       if (flg) {
916         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
917 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
918       } else {
919         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
920 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
921       }
922       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
923       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
924       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
925       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
926       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
927       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
928       PetscFunctionReturn(0);
929     } else if (format == PETSC_VIEWER_ASCII_INFO) {
930       PetscFunctionReturn(0);
931     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
932       PetscFunctionReturn(0);
933     }
934   } else if (isbinary) {
935     if (size == 1) {
936       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
937       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
938     } else {
939       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
940     }
941     PetscFunctionReturn(0);
942   } else if (isdraw) {
943     PetscDraw  draw;
944     PetscTruth isnull;
945     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
946     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
947   }
948 
949   if (size == 1) {
950     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
951     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
952   } else {
953     /* assemble the entire matrix onto first processor. */
954     Mat         A;
955     Mat_SeqAIJ  *Aloc;
956     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
957     PetscScalar *a;
958 
959     if (!rank) {
960       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
961     } else {
962       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
963     }
964     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
965     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
966     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
967     PetscLogObjectParent(mat,A);
968 
969     /* copy over the A part */
970     Aloc = (Mat_SeqAIJ*)aij->A->data;
971     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
972     row = aij->rstart;
973     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
974     for (i=0; i<m; i++) {
975       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
976       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
977     }
978     aj = Aloc->j;
979     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
980 
981     /* copy over the B part */
982     Aloc = (Mat_SeqAIJ*)aij->B->data;
983     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
984     row  = aij->rstart;
985     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
986     ct   = cols;
987     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
988     for (i=0; i<m; i++) {
989       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
990       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
991     }
992     ierr = PetscFree(ct);CHKERRQ(ierr);
993     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
994     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995     /*
996        Everyone has to call to draw the matrix since the graphics waits are
997        synchronized across all processors that share the PetscDraw object
998     */
999     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1000     if (!rank) {
1001       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1002       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1003     }
1004     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1005     ierr = MatDestroy(A);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatView_MPIAIJ"
1012 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1013 {
1014   PetscErrorCode ierr;
1015   PetscTruth     iascii,isdraw,issocket,isbinary;
1016 
1017   PetscFunctionBegin;
1018   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1019   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1020   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1021   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1022   if (iascii || isdraw || isbinary || issocket) {
1023     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1024   } else {
1025     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1026   }
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatRelax_MPIAIJ"
1034 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1035 {
1036   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1037   PetscErrorCode ierr;
1038   Vec            bb1;
1039   PetscScalar    mone=-1.0;
1040 
1041   PetscFunctionBegin;
1042   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1043 
1044   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1045 
1046   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051 
1052     while (its--) {
1053       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055 
1056       /* update rhs: bb1 = bb - B*x */
1057       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1058       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1059 
1060       /* local sweep */
1061       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1062       CHKERRQ(ierr);
1063     }
1064   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1065     if (flag & SOR_ZERO_INITIAL_GUESS) {
1066       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1067       its--;
1068     }
1069     while (its--) {
1070       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072 
1073       /* update rhs: bb1 = bb - B*x */
1074       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1075       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1076 
1077       /* local sweep */
1078       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1079       CHKERRQ(ierr);
1080     }
1081   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1082     if (flag & SOR_ZERO_INITIAL_GUESS) {
1083       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1084       its--;
1085     }
1086     while (its--) {
1087       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1088       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1089 
1090       /* update rhs: bb1 = bb - B*x */
1091       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1092       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1093 
1094       /* local sweep */
1095       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1096       CHKERRQ(ierr);
1097     }
1098   } else {
1099     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1100   }
1101 
1102   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1108 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1109 {
1110   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1111   Mat            A = mat->A,B = mat->B;
1112   PetscErrorCode ierr;
1113   PetscReal      isend[5],irecv[5];
1114 
1115   PetscFunctionBegin;
1116   info->block_size     = 1.0;
1117   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1118   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1119   isend[3] = info->memory;  isend[4] = info->mallocs;
1120   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1121   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1122   isend[3] += info->memory;  isend[4] += info->mallocs;
1123   if (flag == MAT_LOCAL) {
1124     info->nz_used      = isend[0];
1125     info->nz_allocated = isend[1];
1126     info->nz_unneeded  = isend[2];
1127     info->memory       = isend[3];
1128     info->mallocs      = isend[4];
1129   } else if (flag == MAT_GLOBAL_MAX) {
1130     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1131     info->nz_used      = irecv[0];
1132     info->nz_allocated = irecv[1];
1133     info->nz_unneeded  = irecv[2];
1134     info->memory       = irecv[3];
1135     info->mallocs      = irecv[4];
1136   } else if (flag == MAT_GLOBAL_SUM) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   }
1144   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1145   info->fill_ratio_needed = 0;
1146   info->factor_mallocs    = 0;
1147   info->rows_global       = (double)matin->M;
1148   info->columns_global    = (double)matin->N;
1149   info->rows_local        = (double)matin->m;
1150   info->columns_local     = (double)matin->N;
1151 
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatSetOption_MPIAIJ"
1157 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1158 {
1159   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   switch (op) {
1164   case MAT_NO_NEW_NONZERO_LOCATIONS:
1165   case MAT_YES_NEW_NONZERO_LOCATIONS:
1166   case MAT_COLUMNS_UNSORTED:
1167   case MAT_COLUMNS_SORTED:
1168   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1169   case MAT_KEEP_ZEROED_ROWS:
1170   case MAT_NEW_NONZERO_LOCATION_ERR:
1171   case MAT_USE_INODES:
1172   case MAT_DO_NOT_USE_INODES:
1173   case MAT_IGNORE_ZERO_ENTRIES:
1174     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1175     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1176     break;
1177   case MAT_ROW_ORIENTED:
1178     a->roworiented = PETSC_TRUE;
1179     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1180     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1181     break;
1182   case MAT_ROWS_SORTED:
1183   case MAT_ROWS_UNSORTED:
1184   case MAT_YES_NEW_DIAGONALS:
1185     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1186     break;
1187   case MAT_COLUMN_ORIENTED:
1188     a->roworiented = PETSC_FALSE;
1189     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1190     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1191     break;
1192   case MAT_IGNORE_OFF_PROC_ENTRIES:
1193     a->donotstash = PETSC_TRUE;
1194     break;
1195   case MAT_NO_NEW_DIAGONALS:
1196     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1197   case MAT_SYMMETRIC:
1198   case MAT_STRUCTURALLY_SYMMETRIC:
1199   case MAT_HERMITIAN:
1200   case MAT_SYMMETRY_ETERNAL:
1201     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1202     break;
1203   case MAT_NOT_SYMMETRIC:
1204   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1205   case MAT_NOT_HERMITIAN:
1206   case MAT_NOT_SYMMETRY_ETERNAL:
1207     break;
1208   default:
1209     SETERRQ(PETSC_ERR_SUP,"unknown option");
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatGetRow_MPIAIJ"
1216 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1217 {
1218   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1219   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1220   PetscErrorCode ierr;
1221   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1222   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1223   PetscInt       *cmap,*idx_p;
1224 
1225   PetscFunctionBegin;
1226   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1227   mat->getrowactive = PETSC_TRUE;
1228 
1229   if (!mat->rowvalues && (idx || v)) {
1230     /*
1231         allocate enough space to hold information from the longest row.
1232     */
1233     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1234     PetscInt     max = 1,tmp;
1235     for (i=0; i<matin->m; i++) {
1236       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1237       if (max < tmp) { max = tmp; }
1238     }
1239     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1240     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1241   }
1242 
1243   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1244   lrow = row - rstart;
1245 
1246   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1247   if (!v)   {pvA = 0; pvB = 0;}
1248   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1249   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1250   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1251   nztot = nzA + nzB;
1252 
1253   cmap  = mat->garray;
1254   if (v  || idx) {
1255     if (nztot) {
1256       /* Sort by increasing column numbers, assuming A and B already sorted */
1257       PetscInt imark = -1;
1258       if (v) {
1259         *v = v_p = mat->rowvalues;
1260         for (i=0; i<nzB; i++) {
1261           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1262           else break;
1263         }
1264         imark = i;
1265         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1266         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1267       }
1268       if (idx) {
1269         *idx = idx_p = mat->rowindices;
1270         if (imark > -1) {
1271           for (i=0; i<imark; i++) {
1272             idx_p[i] = cmap[cworkB[i]];
1273           }
1274         } else {
1275           for (i=0; i<nzB; i++) {
1276             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1277             else break;
1278           }
1279           imark = i;
1280         }
1281         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1282         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1283       }
1284     } else {
1285       if (idx) *idx = 0;
1286       if (v)   *v   = 0;
1287     }
1288   }
1289   *nz = nztot;
1290   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1291   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1297 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1298 {
1299   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1300 
1301   PetscFunctionBegin;
1302   if (aij->getrowactive == PETSC_FALSE) {
1303     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1304   }
1305   aij->getrowactive = PETSC_FALSE;
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatNorm_MPIAIJ"
1311 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1312 {
1313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1314   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1315   PetscErrorCode ierr;
1316   PetscInt       i,j,cstart = aij->cstart;
1317   PetscReal      sum = 0.0;
1318   PetscScalar    *v;
1319 
1320   PetscFunctionBegin;
1321   if (aij->size == 1) {
1322     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1323   } else {
1324     if (type == NORM_FROBENIUS) {
1325       v = amat->a;
1326       for (i=0; i<amat->nz; i++) {
1327 #if defined(PETSC_USE_COMPLEX)
1328         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1329 #else
1330         sum += (*v)*(*v); v++;
1331 #endif
1332       }
1333       v = bmat->a;
1334       for (i=0; i<bmat->nz; i++) {
1335 #if defined(PETSC_USE_COMPLEX)
1336         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1337 #else
1338         sum += (*v)*(*v); v++;
1339 #endif
1340       }
1341       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1342       *norm = sqrt(*norm);
1343     } else if (type == NORM_1) { /* max column norm */
1344       PetscReal *tmp,*tmp2;
1345       PetscInt    *jj,*garray = aij->garray;
1346       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1347       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1348       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1349       *norm = 0.0;
1350       v = amat->a; jj = amat->j;
1351       for (j=0; j<amat->nz; j++) {
1352         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1353       }
1354       v = bmat->a; jj = bmat->j;
1355       for (j=0; j<bmat->nz; j++) {
1356         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1357       }
1358       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1359       for (j=0; j<mat->N; j++) {
1360         if (tmp2[j] > *norm) *norm = tmp2[j];
1361       }
1362       ierr = PetscFree(tmp);CHKERRQ(ierr);
1363       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1364     } else if (type == NORM_INFINITY) { /* max row norm */
1365       PetscReal ntemp = 0.0;
1366       for (j=0; j<aij->A->m; j++) {
1367         v = amat->a + amat->i[j];
1368         sum = 0.0;
1369         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1370           sum += PetscAbsScalar(*v); v++;
1371         }
1372         v = bmat->a + bmat->i[j];
1373         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1374           sum += PetscAbsScalar(*v); v++;
1375         }
1376         if (sum > ntemp) ntemp = sum;
1377       }
1378       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1379     } else {
1380       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1381     }
1382   }
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatTranspose_MPIAIJ"
1388 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1389 {
1390   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1391   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1392   PetscErrorCode ierr;
1393   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1394   Mat            B;
1395   PetscScalar    *array;
1396 
1397   PetscFunctionBegin;
1398   if (!matout && M != N) {
1399     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1400   }
1401 
1402   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1403   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1404   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1405 
1406   /* copy over the A part */
1407   Aloc = (Mat_SeqAIJ*)a->A->data;
1408   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1409   row = a->rstart;
1410   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1411   for (i=0; i<m; i++) {
1412     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1413     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1414   }
1415   aj = Aloc->j;
1416   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1417 
1418   /* copy over the B part */
1419   Aloc = (Mat_SeqAIJ*)a->B->data;
1420   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1421   row  = a->rstart;
1422   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1423   ct   = cols;
1424   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1425   for (i=0; i<m; i++) {
1426     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1427     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1428   }
1429   ierr = PetscFree(ct);CHKERRQ(ierr);
1430   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1431   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432   if (matout) {
1433     *matout = B;
1434   } else {
1435     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1442 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1443 {
1444   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1445   Mat            a = aij->A,b = aij->B;
1446   PetscErrorCode ierr;
1447   PetscInt       s1,s2,s3;
1448 
1449   PetscFunctionBegin;
1450   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1451   if (rr) {
1452     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1453     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1454     /* Overlap communication with computation. */
1455     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1456   }
1457   if (ll) {
1458     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1459     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1460     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1461   }
1462   /* scale  the diagonal block */
1463   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1464 
1465   if (rr) {
1466     /* Do a scatter end and then right scale the off-diagonal block */
1467     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1468     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1469   }
1470 
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1477 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1478 {
1479   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   if (!a->rank) {
1484     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1485   }
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 #undef __FUNCT__
1490 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1491 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1492 {
1493   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1494   PetscErrorCode ierr;
1495 
1496   PetscFunctionBegin;
1497   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1498   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1503 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1504 {
1505   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1506   PetscErrorCode ierr;
1507 
1508   PetscFunctionBegin;
1509   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatEqual_MPIAIJ"
1515 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1516 {
1517   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1518   Mat            a,b,c,d;
1519   PetscTruth     flg;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   a = matA->A; b = matA->B;
1524   c = matB->A; d = matB->B;
1525 
1526   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1527   if (flg == PETSC_TRUE) {
1528     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1529   }
1530   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatCopy_MPIAIJ"
1536 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1537 {
1538   PetscErrorCode ierr;
1539   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1540   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1541 
1542   PetscFunctionBegin;
1543   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1544   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1545     /* because of the column compression in the off-processor part of the matrix a->B,
1546        the number of columns in a->B and b->B may be different, hence we cannot call
1547        the MatCopy() directly on the two parts. If need be, we can provide a more
1548        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1549        then copying the submatrices */
1550     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1551   } else {
1552     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1553     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1554   }
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1560 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1561 {
1562   PetscErrorCode ierr;
1563 
1564   PetscFunctionBegin;
1565   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #include "petscblaslapack.h"
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatAXPY_MPIAIJ"
1572 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1573 {
1574   PetscErrorCode ierr;
1575   PetscInt       i;
1576   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1577   PetscBLASInt   bnz,one=1;
1578   Mat_SeqAIJ     *x,*y;
1579 
1580   PetscFunctionBegin;
1581   if (str == SAME_NONZERO_PATTERN) {
1582     x = (Mat_SeqAIJ *)xx->A->data;
1583     y = (Mat_SeqAIJ *)yy->A->data;
1584     bnz = (PetscBLASInt)x->nz;
1585     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1586     x = (Mat_SeqAIJ *)xx->B->data;
1587     y = (Mat_SeqAIJ *)yy->B->data;
1588     bnz = (PetscBLASInt)x->nz;
1589     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1590   } else if (str == SUBSET_NONZERO_PATTERN) {
1591     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1592 
1593     x = (Mat_SeqAIJ *)xx->B->data;
1594     y = (Mat_SeqAIJ *)yy->B->data;
1595     if (y->xtoy && y->XtoY != xx->B) {
1596       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1597       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1598     }
1599     if (!y->xtoy) { /* get xtoy */
1600       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1601       y->XtoY = xx->B;
1602     }
1603     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1604   } else {
1605     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 /* -------------------------------------------------------------------*/
1611 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1612        MatGetRow_MPIAIJ,
1613        MatRestoreRow_MPIAIJ,
1614        MatMult_MPIAIJ,
1615 /* 4*/ MatMultAdd_MPIAIJ,
1616        MatMultTranspose_MPIAIJ,
1617        MatMultTransposeAdd_MPIAIJ,
1618        0,
1619        0,
1620        0,
1621 /*10*/ 0,
1622        0,
1623        0,
1624        MatRelax_MPIAIJ,
1625        MatTranspose_MPIAIJ,
1626 /*15*/ MatGetInfo_MPIAIJ,
1627        MatEqual_MPIAIJ,
1628        MatGetDiagonal_MPIAIJ,
1629        MatDiagonalScale_MPIAIJ,
1630        MatNorm_MPIAIJ,
1631 /*20*/ MatAssemblyBegin_MPIAIJ,
1632        MatAssemblyEnd_MPIAIJ,
1633        0,
1634        MatSetOption_MPIAIJ,
1635        MatZeroEntries_MPIAIJ,
1636 /*25*/ MatZeroRows_MPIAIJ,
1637        0,
1638        0,
1639        0,
1640        0,
1641 /*30*/ MatSetUpPreallocation_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646 /*35*/ MatDuplicate_MPIAIJ,
1647        0,
1648        0,
1649        0,
1650        0,
1651 /*40*/ MatAXPY_MPIAIJ,
1652        MatGetSubMatrices_MPIAIJ,
1653        MatIncreaseOverlap_MPIAIJ,
1654        MatGetValues_MPIAIJ,
1655        MatCopy_MPIAIJ,
1656 /*45*/ MatPrintHelp_MPIAIJ,
1657        MatScale_MPIAIJ,
1658        0,
1659        0,
1660        0,
1661 /*50*/ MatSetBlockSize_MPIAIJ,
1662        0,
1663        0,
1664        0,
1665        0,
1666 /*55*/ MatFDColoringCreate_MPIAIJ,
1667        0,
1668        MatSetUnfactored_MPIAIJ,
1669        0,
1670        0,
1671 /*60*/ MatGetSubMatrix_MPIAIJ,
1672        MatDestroy_MPIAIJ,
1673        MatView_MPIAIJ,
1674        MatGetPetscMaps_Petsc,
1675        0,
1676 /*65*/ 0,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*70*/ 0,
1682        0,
1683        MatSetColoring_MPIAIJ,
1684        MatSetValuesAdic_MPIAIJ,
1685        MatSetValuesAdifor_MPIAIJ,
1686 /*75*/ 0,
1687        0,
1688        0,
1689        0,
1690        0,
1691 /*80*/ 0,
1692        0,
1693        0,
1694        0,
1695 /*84*/ MatLoad_MPIAIJ,
1696        0,
1697        0,
1698        0,
1699        0,
1700        0,
1701 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1702        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1703        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1704        MatPtAP_MPIAIJ_MPIAIJ,
1705        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1706 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1707        0,
1708        0,
1709        0};
1710 
1711 /* ----------------------------------------------------------------------------------------*/
1712 
1713 EXTERN_C_BEGIN
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1716 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1717 {
1718   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1723   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1724   PetscFunctionReturn(0);
1725 }
1726 EXTERN_C_END
1727 
1728 EXTERN_C_BEGIN
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1731 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1732 {
1733   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1738   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 EXTERN_C_END
1742 
1743 #include "petscpc.h"
1744 EXTERN_C_BEGIN
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1747 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1748 {
1749   Mat_MPIAIJ     *b;
1750   PetscErrorCode ierr;
1751   PetscInt       i;
1752 
1753   PetscFunctionBegin;
1754   B->preallocated = PETSC_TRUE;
1755   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1756   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1757   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1758   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1759   if (d_nnz) {
1760     for (i=0; i<B->m; i++) {
1761       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1762     }
1763   }
1764   if (o_nnz) {
1765     for (i=0; i<B->m; i++) {
1766       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1767     }
1768   }
1769   b = (Mat_MPIAIJ*)B->data;
1770   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1771   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1772 
1773   PetscFunctionReturn(0);
1774 }
1775 EXTERN_C_END
1776 
1777 /*MC
1778    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1779 
1780    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1781    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1782   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1783   for communicators controlling multiple processes.  It is recommended that you call both of
1784   the above preallocation routines for simplicity.
1785 
1786    Options Database Keys:
1787 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1788 
1789   Level: beginner
1790 
1791 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1792 M*/
1793 
1794 EXTERN_C_BEGIN
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatCreate_AIJ"
1797 PetscErrorCode MatCreate_AIJ(Mat A)
1798 {
1799   PetscErrorCode ierr;
1800   PetscMPIInt    size;
1801 
1802   PetscFunctionBegin;
1803   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1804   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1805   if (size == 1) {
1806     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1807   } else {
1808     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 EXTERN_C_END
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1817 {
1818   Mat            mat;
1819   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1820   PetscErrorCode ierr;
1821 
1822   PetscFunctionBegin;
1823   *newmat       = 0;
1824   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1825   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1826   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1827   a    = (Mat_MPIAIJ*)mat->data;
1828 
1829   mat->factor       = matin->factor;
1830   mat->bs           = matin->bs;
1831   mat->assembled    = PETSC_TRUE;
1832   mat->insertmode   = NOT_SET_VALUES;
1833   mat->preallocated = PETSC_TRUE;
1834 
1835   a->rstart       = oldmat->rstart;
1836   a->rend         = oldmat->rend;
1837   a->cstart       = oldmat->cstart;
1838   a->cend         = oldmat->cend;
1839   a->size         = oldmat->size;
1840   a->rank         = oldmat->rank;
1841   a->donotstash   = oldmat->donotstash;
1842   a->roworiented  = oldmat->roworiented;
1843   a->rowindices   = 0;
1844   a->rowvalues    = 0;
1845   a->getrowactive = PETSC_FALSE;
1846 
1847   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1848   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1849   if (oldmat->colmap) {
1850 #if defined (PETSC_USE_CTABLE)
1851     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1852 #else
1853     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1854     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1855     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1856 #endif
1857   } else a->colmap = 0;
1858   if (oldmat->garray) {
1859     PetscInt len;
1860     len  = oldmat->B->n;
1861     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1862     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1863     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1864   } else a->garray = 0;
1865 
1866   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1867   PetscLogObjectParent(mat,a->lvec);
1868   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1869   PetscLogObjectParent(mat,a->Mvctx);
1870   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1871   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1872   PetscLogObjectParent(mat,a->A);
1873   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1874   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1875   PetscLogObjectParent(mat,a->B);
1876   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1877   *newmat = mat;
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #include "petscsys.h"
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatLoad_MPIAIJ"
1885 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1886 {
1887   Mat            A;
1888   PetscScalar    *vals,*svals;
1889   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1890   MPI_Status     status;
1891   PetscErrorCode ierr;
1892   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1893   PetscInt       i,nz,j,rstart,rend;
1894   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1895   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1896   PetscInt       cend,cstart,n,*rowners;
1897   int            fd;
1898 
1899   PetscFunctionBegin;
1900   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1901   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1902   if (!rank) {
1903     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1904     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1905     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1906   }
1907 
1908   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1909   M = header[1]; N = header[2];
1910   /* determine ownership of all rows */
1911   m    = M/size + ((M % size) > rank);
1912   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1913   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1914   rowners[0] = 0;
1915   for (i=2; i<=size; i++) {
1916     rowners[i] += rowners[i-1];
1917   }
1918   rstart = rowners[rank];
1919   rend   = rowners[rank+1];
1920 
1921   /* distribute row lengths to all processors */
1922   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1923   if (!rank) {
1924     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1925     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1926     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1927     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1928     for (j=0; j<m; j++) {
1929       procsnz[0] += ourlens[j];
1930     }
1931     for (i=1; i<size; i++) {
1932       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1933       /* calculate the number of nonzeros on each processor */
1934       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1935         procsnz[i] += rowlengths[j];
1936       }
1937       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1938     }
1939     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1940   } else {
1941     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1942   }
1943 
1944   if (!rank) {
1945     /* determine max buffer needed and allocate it */
1946     maxnz = 0;
1947     for (i=0; i<size; i++) {
1948       maxnz = PetscMax(maxnz,procsnz[i]);
1949     }
1950     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1951 
1952     /* read in my part of the matrix column indices  */
1953     nz   = procsnz[0];
1954     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1955     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1956 
1957     /* read in every one elses and ship off */
1958     for (i=1; i<size; i++) {
1959       nz   = procsnz[i];
1960       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1961       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1962     }
1963     ierr = PetscFree(cols);CHKERRQ(ierr);
1964   } else {
1965     /* determine buffer space needed for message */
1966     nz = 0;
1967     for (i=0; i<m; i++) {
1968       nz += ourlens[i];
1969     }
1970     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1971 
1972     /* receive message of column indices*/
1973     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1974     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1975     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1976   }
1977 
1978   /* determine column ownership if matrix is not square */
1979   if (N != M) {
1980     n      = N/size + ((N % size) > rank);
1981     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1982     cstart = cend - n;
1983   } else {
1984     cstart = rstart;
1985     cend   = rend;
1986     n      = cend - cstart;
1987   }
1988 
1989   /* loop over local rows, determining number of off diagonal entries */
1990   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1991   jj = 0;
1992   for (i=0; i<m; i++) {
1993     for (j=0; j<ourlens[i]; j++) {
1994       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1995       jj++;
1996     }
1997   }
1998 
1999   /* create our matrix */
2000   for (i=0; i<m; i++) {
2001     ourlens[i] -= offlens[i];
2002   }
2003   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2004   ierr = MatSetType(A,type);CHKERRQ(ierr);
2005   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2006 
2007   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2008   for (i=0; i<m; i++) {
2009     ourlens[i] += offlens[i];
2010   }
2011 
2012   if (!rank) {
2013     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2014 
2015     /* read in my part of the matrix numerical values  */
2016     nz   = procsnz[0];
2017     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2018 
2019     /* insert into matrix */
2020     jj      = rstart;
2021     smycols = mycols;
2022     svals   = vals;
2023     for (i=0; i<m; i++) {
2024       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2025       smycols += ourlens[i];
2026       svals   += ourlens[i];
2027       jj++;
2028     }
2029 
2030     /* read in other processors and ship out */
2031     for (i=1; i<size; i++) {
2032       nz   = procsnz[i];
2033       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2034       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2035     }
2036     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2037   } else {
2038     /* receive numeric values */
2039     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2040 
2041     /* receive message of values*/
2042     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2043     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2044     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2045 
2046     /* insert into matrix */
2047     jj      = rstart;
2048     smycols = mycols;
2049     svals   = vals;
2050     for (i=0; i<m; i++) {
2051       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2052       smycols += ourlens[i];
2053       svals   += ourlens[i];
2054       jj++;
2055     }
2056   }
2057   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2058   ierr = PetscFree(vals);CHKERRQ(ierr);
2059   ierr = PetscFree(mycols);CHKERRQ(ierr);
2060   ierr = PetscFree(rowners);CHKERRQ(ierr);
2061 
2062   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   *newmat = A;
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2070 /*
2071     Not great since it makes two copies of the submatrix, first an SeqAIJ
2072   in local and then by concatenating the local matrices the end result.
2073   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2074 */
2075 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2076 {
2077   PetscErrorCode ierr;
2078   PetscMPIInt    rank,size;
2079   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2080   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2081   Mat            *local,M,Mreuse;
2082   PetscScalar    *vwork,*aa;
2083   MPI_Comm       comm = mat->comm;
2084   Mat_SeqAIJ     *aij;
2085 
2086 
2087   PetscFunctionBegin;
2088   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2089   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2090 
2091   if (call ==  MAT_REUSE_MATRIX) {
2092     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2093     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2094     local = &Mreuse;
2095     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2096   } else {
2097     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2098     Mreuse = *local;
2099     ierr   = PetscFree(local);CHKERRQ(ierr);
2100   }
2101 
2102   /*
2103       m - number of local rows
2104       n - number of columns (same on all processors)
2105       rstart - first row in new global matrix generated
2106   */
2107   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2108   if (call == MAT_INITIAL_MATRIX) {
2109     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2110     ii  = aij->i;
2111     jj  = aij->j;
2112 
2113     /*
2114         Determine the number of non-zeros in the diagonal and off-diagonal
2115         portions of the matrix in order to do correct preallocation
2116     */
2117 
2118     /* first get start and end of "diagonal" columns */
2119     if (csize == PETSC_DECIDE) {
2120       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2121       if (mglobal == n) { /* square matrix */
2122 	nlocal = m;
2123       } else {
2124         nlocal = n/size + ((n % size) > rank);
2125       }
2126     } else {
2127       nlocal = csize;
2128     }
2129     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2130     rstart = rend - nlocal;
2131     if (rank == size - 1 && rend != n) {
2132       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2133     }
2134 
2135     /* next, compute all the lengths */
2136     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2137     olens = dlens + m;
2138     for (i=0; i<m; i++) {
2139       jend = ii[i+1] - ii[i];
2140       olen = 0;
2141       dlen = 0;
2142       for (j=0; j<jend; j++) {
2143         if (*jj < rstart || *jj >= rend) olen++;
2144         else dlen++;
2145         jj++;
2146       }
2147       olens[i] = olen;
2148       dlens[i] = dlen;
2149     }
2150     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2151     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2152     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2153     ierr = PetscFree(dlens);CHKERRQ(ierr);
2154   } else {
2155     PetscInt ml,nl;
2156 
2157     M = *newmat;
2158     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2159     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2160     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2161     /*
2162          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2163        rather than the slower MatSetValues().
2164     */
2165     M->was_assembled = PETSC_TRUE;
2166     M->assembled     = PETSC_FALSE;
2167   }
2168   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2169   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2170   ii  = aij->i;
2171   jj  = aij->j;
2172   aa  = aij->a;
2173   for (i=0; i<m; i++) {
2174     row   = rstart + i;
2175     nz    = ii[i+1] - ii[i];
2176     cwork = jj;     jj += nz;
2177     vwork = aa;     aa += nz;
2178     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2179   }
2180 
2181   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   *newmat = M;
2184 
2185   /* save submatrix used in processor for next request */
2186   if (call ==  MAT_INITIAL_MATRIX) {
2187     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2188     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2189   }
2190 
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 EXTERN_C_BEGIN
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2197 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2198 {
2199   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2200   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2201   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2202   const PetscInt *JJ;
2203   PetscScalar    *values;
2204   PetscErrorCode ierr;
2205 
2206   PetscFunctionBegin;
2207 #if defined(PETSC_OPT_g)
2208   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2209 #endif
2210   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2211   o_nnz = d_nnz + m;
2212 
2213   for (i=0; i<m; i++) {
2214     nnz     = I[i+1]- I[i];
2215     JJ      = J + I[i];
2216     nnz_max = PetscMax(nnz_max,nnz);
2217 #if defined(PETSC_OPT_g)
2218     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2219 #endif
2220     for (j=0; j<nnz; j++) {
2221       if (*JJ >= cstart) break;
2222       JJ++;
2223     }
2224     d = 0;
2225     for (; j<nnz; j++) {
2226       if (*JJ++ >= cend) break;
2227       d++;
2228     }
2229     d_nnz[i] = d;
2230     o_nnz[i] = nnz - d;
2231   }
2232   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2233   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2234 
2235   if (v) values = (PetscScalar*)v;
2236   else {
2237     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2238     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2239   }
2240 
2241   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2242   for (i=0; i<m; i++) {
2243     ii   = i + rstart;
2244     nnz  = I[i+1]- I[i];
2245     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2246   }
2247   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2248   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2249   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2250 
2251   if (!v) {
2252     ierr = PetscFree(values);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 EXTERN_C_END
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2260 /*@C
2261    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2262    (the default parallel PETSc format).
2263 
2264    Collective on MPI_Comm
2265 
2266    Input Parameters:
2267 +  A - the matrix
2268 .  i - the indices into j for the start of each local row (starts with zero)
2269 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2270 -  v - optional values in the matrix
2271 
2272    Level: developer
2273 
2274 .keywords: matrix, aij, compressed row, sparse, parallel
2275 
2276 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2277 @*/
2278 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2279 {
2280   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2281 
2282   PetscFunctionBegin;
2283   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2284   if (f) {
2285     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2286   }
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2292 /*@C
2293    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2294    (the default parallel PETSc format).  For good matrix assembly performance
2295    the user should preallocate the matrix storage by setting the parameters
2296    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2297    performance can be increased by more than a factor of 50.
2298 
2299    Collective on MPI_Comm
2300 
2301    Input Parameters:
2302 +  A - the matrix
2303 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2304            (same value is used for all local rows)
2305 .  d_nnz - array containing the number of nonzeros in the various rows of the
2306            DIAGONAL portion of the local submatrix (possibly different for each row)
2307            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2308            The size of this array is equal to the number of local rows, i.e 'm'.
2309            You must leave room for the diagonal entry even if it is zero.
2310 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2311            submatrix (same value is used for all local rows).
2312 -  o_nnz - array containing the number of nonzeros in the various rows of the
2313            OFF-DIAGONAL portion of the local submatrix (possibly different for
2314            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2315            structure. The size of this array is equal to the number
2316            of local rows, i.e 'm'.
2317 
2318    If the *_nnz parameter is given then the *_nz parameter is ignored
2319 
2320    The AIJ format (also called the Yale sparse matrix format or
2321    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2322    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2323 
2324    The parallel matrix is partitioned such that the first m0 rows belong to
2325    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2326    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2327 
2328    The DIAGONAL portion of the local submatrix of a processor can be defined
2329    as the submatrix which is obtained by extraction the part corresponding
2330    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2331    first row that belongs to the processor, and r2 is the last row belonging
2332    to the this processor. This is a square mxm matrix. The remaining portion
2333    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2334 
2335    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2336 
2337    Example usage:
2338 
2339    Consider the following 8x8 matrix with 34 non-zero values, that is
2340    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2341    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2342    as follows:
2343 
2344 .vb
2345             1  2  0  |  0  3  0  |  0  4
2346     Proc0   0  5  6  |  7  0  0  |  8  0
2347             9  0 10  | 11  0  0  | 12  0
2348     -------------------------------------
2349            13  0 14  | 15 16 17  |  0  0
2350     Proc1   0 18  0  | 19 20 21  |  0  0
2351             0  0  0  | 22 23  0  | 24  0
2352     -------------------------------------
2353     Proc2  25 26 27  |  0  0 28  | 29  0
2354            30  0  0  | 31 32 33  |  0 34
2355 .ve
2356 
2357    This can be represented as a collection of submatrices as:
2358 
2359 .vb
2360       A B C
2361       D E F
2362       G H I
2363 .ve
2364 
2365    Where the submatrices A,B,C are owned by proc0, D,E,F are
2366    owned by proc1, G,H,I are owned by proc2.
2367 
2368    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2369    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2370    The 'M','N' parameters are 8,8, and have the same values on all procs.
2371 
2372    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2373    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2374    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2375    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2376    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2377    matrix, ans [DF] as another SeqAIJ matrix.
2378 
2379    When d_nz, o_nz parameters are specified, d_nz storage elements are
2380    allocated for every row of the local diagonal submatrix, and o_nz
2381    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2382    One way to choose d_nz and o_nz is to use the max nonzerors per local
2383    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2384    In this case, the values of d_nz,o_nz are:
2385 .vb
2386      proc0 : dnz = 2, o_nz = 2
2387      proc1 : dnz = 3, o_nz = 2
2388      proc2 : dnz = 1, o_nz = 4
2389 .ve
2390    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2391    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2392    for proc3. i.e we are using 12+15+10=37 storage locations to store
2393    34 values.
2394 
2395    When d_nnz, o_nnz parameters are specified, the storage is specified
2396    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2397    In the above case the values for d_nnz,o_nnz are:
2398 .vb
2399      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2400      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2401      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2402 .ve
2403    Here the space allocated is sum of all the above values i.e 34, and
2404    hence pre-allocation is perfect.
2405 
2406    Level: intermediate
2407 
2408 .keywords: matrix, aij, compressed row, sparse, parallel
2409 
2410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2411           MPIAIJ
2412 @*/
2413 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2414 {
2415   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2419   if (f) {
2420     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatCreateMPIAIJ"
2427 /*@C
2428    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2429    (the default parallel PETSc format).  For good matrix assembly performance
2430    the user should preallocate the matrix storage by setting the parameters
2431    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2432    performance can be increased by more than a factor of 50.
2433 
2434    Collective on MPI_Comm
2435 
2436    Input Parameters:
2437 +  comm - MPI communicator
2438 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2439            This value should be the same as the local size used in creating the
2440            y vector for the matrix-vector product y = Ax.
2441 .  n - This value should be the same as the local size used in creating the
2442        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2443        calculated if N is given) For square matrices n is almost always m.
2444 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2445 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2446 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2447            (same value is used for all local rows)
2448 .  d_nnz - array containing the number of nonzeros in the various rows of the
2449            DIAGONAL portion of the local submatrix (possibly different for each row)
2450            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2451            The size of this array is equal to the number of local rows, i.e 'm'.
2452            You must leave room for the diagonal entry even if it is zero.
2453 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2454            submatrix (same value is used for all local rows).
2455 -  o_nnz - array containing the number of nonzeros in the various rows of the
2456            OFF-DIAGONAL portion of the local submatrix (possibly different for
2457            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2458            structure. The size of this array is equal to the number
2459            of local rows, i.e 'm'.
2460 
2461    Output Parameter:
2462 .  A - the matrix
2463 
2464    Notes:
2465    If the *_nnz parameter is given then the *_nz parameter is ignored
2466 
2467    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2468    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2469    storage requirements for this matrix.
2470 
2471    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2472    processor than it must be used on all processors that share the object for
2473    that argument.
2474 
2475    The user MUST specify either the local or global matrix dimensions
2476    (possibly both).
2477 
2478    The parallel matrix is partitioned such that the first m0 rows belong to
2479    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2480    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2481 
2482    The DIAGONAL portion of the local submatrix of a processor can be defined
2483    as the submatrix which is obtained by extraction the part corresponding
2484    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2485    first row that belongs to the processor, and r2 is the last row belonging
2486    to the this processor. This is a square mxm matrix. The remaining portion
2487    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2488 
2489    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2490 
2491    When calling this routine with a single process communicator, a matrix of
2492    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2493    type of communicator, use the construction mechanism:
2494      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2495 
2496    By default, this format uses inodes (identical nodes) when possible.
2497    We search for consecutive rows with the same nonzero structure, thereby
2498    reusing matrix information to achieve increased efficiency.
2499 
2500    Options Database Keys:
2501 +  -mat_aij_no_inode  - Do not use inodes
2502 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2503 -  -mat_aij_oneindex - Internally use indexing starting at 1
2504         rather than 0.  Note that when calling MatSetValues(),
2505         the user still MUST index entries starting at 0!
2506 
2507 
2508    Example usage:
2509 
2510    Consider the following 8x8 matrix with 34 non-zero values, that is
2511    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2512    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2513    as follows:
2514 
2515 .vb
2516             1  2  0  |  0  3  0  |  0  4
2517     Proc0   0  5  6  |  7  0  0  |  8  0
2518             9  0 10  | 11  0  0  | 12  0
2519     -------------------------------------
2520            13  0 14  | 15 16 17  |  0  0
2521     Proc1   0 18  0  | 19 20 21  |  0  0
2522             0  0  0  | 22 23  0  | 24  0
2523     -------------------------------------
2524     Proc2  25 26 27  |  0  0 28  | 29  0
2525            30  0  0  | 31 32 33  |  0 34
2526 .ve
2527 
2528    This can be represented as a collection of submatrices as:
2529 
2530 .vb
2531       A B C
2532       D E F
2533       G H I
2534 .ve
2535 
2536    Where the submatrices A,B,C are owned by proc0, D,E,F are
2537    owned by proc1, G,H,I are owned by proc2.
2538 
2539    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2540    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2541    The 'M','N' parameters are 8,8, and have the same values on all procs.
2542 
2543    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2544    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2545    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2546    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2547    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2548    matrix, ans [DF] as another SeqAIJ matrix.
2549 
2550    When d_nz, o_nz parameters are specified, d_nz storage elements are
2551    allocated for every row of the local diagonal submatrix, and o_nz
2552    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2553    One way to choose d_nz and o_nz is to use the max nonzerors per local
2554    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2555    In this case, the values of d_nz,o_nz are:
2556 .vb
2557      proc0 : dnz = 2, o_nz = 2
2558      proc1 : dnz = 3, o_nz = 2
2559      proc2 : dnz = 1, o_nz = 4
2560 .ve
2561    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2562    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2563    for proc3. i.e we are using 12+15+10=37 storage locations to store
2564    34 values.
2565 
2566    When d_nnz, o_nnz parameters are specified, the storage is specified
2567    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2568    In the above case the values for d_nnz,o_nnz are:
2569 .vb
2570      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2571      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2572      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2573 .ve
2574    Here the space allocated is sum of all the above values i.e 34, and
2575    hence pre-allocation is perfect.
2576 
2577    Level: intermediate
2578 
2579 .keywords: matrix, aij, compressed row, sparse, parallel
2580 
2581 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2582           MPIAIJ
2583 @*/
2584 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2585 {
2586   PetscErrorCode ierr;
2587   PetscMPIInt    size;
2588 
2589   PetscFunctionBegin;
2590   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2592   if (size > 1) {
2593     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2594     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2595   } else {
2596     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2597     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2598   }
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2604 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2605 {
2606   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2607 
2608   PetscFunctionBegin;
2609   *Ad     = a->A;
2610   *Ao     = a->B;
2611   *colmap = a->garray;
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 #undef __FUNCT__
2616 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2617 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2618 {
2619   PetscErrorCode ierr;
2620   PetscInt       i;
2621   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2622 
2623   PetscFunctionBegin;
2624   if (coloring->ctype == IS_COLORING_LOCAL) {
2625     ISColoringValue *allcolors,*colors;
2626     ISColoring      ocoloring;
2627 
2628     /* set coloring for diagonal portion */
2629     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2630 
2631     /* set coloring for off-diagonal portion */
2632     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2633     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2634     for (i=0; i<a->B->n; i++) {
2635       colors[i] = allcolors[a->garray[i]];
2636     }
2637     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2638     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2639     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2640     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2641   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2642     ISColoringValue *colors;
2643     PetscInt             *larray;
2644     ISColoring      ocoloring;
2645 
2646     /* set coloring for diagonal portion */
2647     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2648     for (i=0; i<a->A->n; i++) {
2649       larray[i] = i + a->cstart;
2650     }
2651     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2652     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2653     for (i=0; i<a->A->n; i++) {
2654       colors[i] = coloring->colors[larray[i]];
2655     }
2656     ierr = PetscFree(larray);CHKERRQ(ierr);
2657     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2658     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2659     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2660 
2661     /* set coloring for off-diagonal portion */
2662     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2663     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2664     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2665     for (i=0; i<a->B->n; i++) {
2666       colors[i] = coloring->colors[larray[i]];
2667     }
2668     ierr = PetscFree(larray);CHKERRQ(ierr);
2669     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2670     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2671     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2672   } else {
2673     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2674   }
2675 
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2681 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2682 {
2683   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2684   PetscErrorCode ierr;
2685 
2686   PetscFunctionBegin;
2687   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2688   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2694 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2695 {
2696   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2697   PetscErrorCode ierr;
2698 
2699   PetscFunctionBegin;
2700   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2701   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatMerge"
2707 /*@C
2708       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2709                  matrices from each processor
2710 
2711     Collective on MPI_Comm
2712 
2713    Input Parameters:
2714 +    comm - the communicators the parallel matrix will live on
2715 .    inmat - the input sequential matrices
2716 .    n - number of local columns (or PETSC_DECIDE)
2717 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2718 
2719    Output Parameter:
2720 .    outmat - the parallel matrix generated
2721 
2722     Level: advanced
2723 
2724    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2725 
2726 @*/
2727 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2728 {
2729   PetscErrorCode ierr;
2730   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2731   PetscInt       *indx;
2732   PetscScalar    *values;
2733   PetscMap       columnmap,rowmap;
2734 
2735   PetscFunctionBegin;
2736     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2737   /*
2738   PetscMPIInt       rank;
2739   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2740   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2741   */
2742   if (scall == MAT_INITIAL_MATRIX){
2743     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2744     if (n == PETSC_DECIDE){
2745       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2746       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2747       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2748       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2749       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2750     }
2751 
2752     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2753     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2754     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2755     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2756     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2757 
2758     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2759     for (i=0;i<m;i++) {
2760       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2761       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2762       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2763     }
2764     /* This routine will ONLY return MPIAIJ type matrix */
2765     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2766     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2767     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2768     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2769 
2770   } else if (scall == MAT_REUSE_MATRIX){
2771     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2772   } else {
2773     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2774   }
2775 
2776   for (i=0;i<m;i++) {
2777     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2778     I    = i + rstart;
2779     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2780     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2781   }
2782   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2783   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785 
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatFileSplit"
2791 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2792 {
2793   PetscErrorCode    ierr;
2794   PetscMPIInt       rank;
2795   PetscInt          m,N,i,rstart,nnz;
2796   size_t            len;
2797   const PetscInt    *indx;
2798   PetscViewer       out;
2799   char              *name;
2800   Mat               B;
2801   const PetscScalar *values;
2802 
2803   PetscFunctionBegin;
2804   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2805   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2806   /* Should this be the type of the diagonal block of A? */
2807   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2808   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2809   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2810   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2811   for (i=0;i<m;i++) {
2812     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2813     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2814     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2815   }
2816   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2817   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818 
2819   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2820   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2821   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2822   sprintf(name,"%s.%d",outfile,rank);
2823   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2824   ierr = PetscFree(name);
2825   ierr = MatView(B,out);CHKERRQ(ierr);
2826   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2827   ierr = MatDestroy(B);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2834 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2835 {
2836   PetscErrorCode       ierr;
2837   Mat_Merge_SeqsToMPI  *merge;
2838   PetscObjectContainer container;
2839 
2840   PetscFunctionBegin;
2841   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2842   if (container) {
2843     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2844     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2845     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2846     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2848     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2850     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2851     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2852     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2853     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2854     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2855 
2856     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2857     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2858   }
2859   ierr = PetscFree(merge);CHKERRQ(ierr);
2860 
2861   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 #include "src/mat/utils/freespace.h"
2866 #include "petscbt.h"
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatMerge_SeqsToMPI"
2869 /*@C
2870       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2871                  matrices from each processor
2872 
2873     Collective on MPI_Comm
2874 
2875    Input Parameters:
2876 +    comm - the communicators the parallel matrix will live on
2877 .    seqmat - the input sequential matrices
2878 .    m - number of local rows (or PETSC_DECIDE)
2879 .    n - number of local columns (or PETSC_DECIDE)
2880 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2881 
2882    Output Parameter:
2883 .    mpimat - the parallel matrix generated
2884 
2885     Level: advanced
2886 
2887    Notes:
2888      The dimensions of the sequential matrix in each processor MUST be the same.
2889      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2890      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2891 @*/
2892 static PetscEvent logkey_seqstompinum = 0;
2893 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2894 {
2895   PetscErrorCode       ierr;
2896   MPI_Comm             comm=mpimat->comm;
2897   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2898   PetscMPIInt          size,rank,taga,*len_s;
2899   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2900   PetscInt             proc,m;
2901   PetscInt             **buf_ri,**buf_rj;
2902   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2903   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2904   MPI_Request          *s_waits,*r_waits;
2905   MPI_Status           *status;
2906   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2907   Mat_Merge_SeqsToMPI  *merge;
2908   PetscObjectContainer container;
2909 
2910   PetscFunctionBegin;
2911   if (!logkey_seqstompinum) {
2912     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2913   }
2914   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2915 
2916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2917   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2918 
2919   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2920   if (container) {
2921     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2922   }
2923   bi     = merge->bi;
2924   bj     = merge->bj;
2925   buf_ri = merge->buf_ri;
2926   buf_rj = merge->buf_rj;
2927 
2928   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2929   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2930   len_s  = merge->len_s;
2931 
2932   /* send and recv matrix values */
2933   /*-----------------------------*/
2934   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2935   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2936 
2937   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2938   for (proc=0,k=0; proc<size; proc++){
2939     if (!len_s[proc]) continue;
2940     i = owners[proc];
2941     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2942     k++;
2943   }
2944 
2945   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2946   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2947   ierr = PetscFree(status);CHKERRQ(ierr);
2948 
2949   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2950   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2951 
2952   /* insert mat values of mpimat */
2953   /*----------------------------*/
2954   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2955   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2956   nextrow = buf_ri_k + merge->nrecv;
2957   nextai  = nextrow + merge->nrecv;
2958 
2959   for (k=0; k<merge->nrecv; k++){
2960     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2961     nrows = *(buf_ri_k[k]);
2962     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2963     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2964   }
2965 
2966   /* set values of ba */
2967   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2968   for (i=0; i<m; i++) {
2969     arow = owners[rank] + i;
2970     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2971     bnzi = bi[i+1] - bi[i];
2972     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2973 
2974     /* add local non-zero vals of this proc's seqmat into ba */
2975     anzi = ai[arow+1] - ai[arow];
2976     aj   = a->j + ai[arow];
2977     aa   = a->a + ai[arow];
2978     nextaj = 0;
2979     for (j=0; nextaj<anzi; j++){
2980       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2981         ba_i[j] += aa[nextaj++];
2982       }
2983     }
2984 
2985     /* add received vals into ba */
2986     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2987       /* i-th row */
2988       if (i == *nextrow[k]) {
2989         anzi = *(nextai[k]+1) - *nextai[k];
2990         aj   = buf_rj[k] + *(nextai[k]);
2991         aa   = abuf_r[k] + *(nextai[k]);
2992         nextaj = 0;
2993         for (j=0; nextaj<anzi; j++){
2994           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2995             ba_i[j] += aa[nextaj++];
2996           }
2997         }
2998         nextrow[k]++; nextai[k]++;
2999       }
3000     }
3001     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3002   }
3003   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3004   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005 
3006   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3007   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3008   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3009   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 static PetscEvent logkey_seqstompisym = 0;
3013 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3014 {
3015   PetscErrorCode       ierr;
3016   Mat                  B_mpi;
3017   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3018   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3019   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3020   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3021   PetscInt             len,proc,*dnz,*onz;
3022   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3023   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3024   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3025   MPI_Status           *status;
3026   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3027   PetscBT              lnkbt;
3028   Mat_Merge_SeqsToMPI  *merge;
3029   PetscObjectContainer container;
3030 
3031   PetscFunctionBegin;
3032   if (!logkey_seqstompisym) {
3033     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3034   }
3035   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3036 
3037   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3038   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3039 
3040   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3041   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3042 
3043   /* determine row ownership */
3044   /*---------------------------------------------------------*/
3045   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3046   if (m == PETSC_DECIDE) {
3047     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3048   } else {
3049     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3050   }
3051   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3052   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3053   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3054 
3055   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3056   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3057 
3058   /* determine the number of messages to send, their lengths */
3059   /*---------------------------------------------------------*/
3060   len_s  = merge->len_s;
3061 
3062   len = 0;  /* length of buf_si[] */
3063   merge->nsend = 0;
3064   for (proc=0; proc<size; proc++){
3065     len_si[proc] = 0;
3066     if (proc == rank){
3067       len_s[proc] = 0;
3068     } else {
3069       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3070       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3071     }
3072     if (len_s[proc]) {
3073       merge->nsend++;
3074       nrows = 0;
3075       for (i=owners[proc]; i<owners[proc+1]; i++){
3076         if (ai[i+1] > ai[i]) nrows++;
3077       }
3078       len_si[proc] = 2*(nrows+1);
3079       len += len_si[proc];
3080     }
3081   }
3082 
3083   /* determine the number and length of messages to receive for ij-structure */
3084   /*-------------------------------------------------------------------------*/
3085   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3086   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3087 
3088   /* post the Irecv of j-structure */
3089   /*-------------------------------*/
3090   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3091   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3092 
3093   /* post the Isend of j-structure */
3094   /*--------------------------------*/
3095   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3096   sj_waits = si_waits + merge->nsend;
3097 
3098   for (proc=0, k=0; proc<size; proc++){
3099     if (!len_s[proc]) continue;
3100     i = owners[proc];
3101     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3102     k++;
3103   }
3104 
3105   /* receives and sends of j-structure are complete */
3106   /*------------------------------------------------*/
3107   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3108   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3109 
3110   /* send and recv i-structure */
3111   /*---------------------------*/
3112   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3113   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3114 
3115   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3116   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3117   for (proc=0,k=0; proc<size; proc++){
3118     if (!len_s[proc]) continue;
3119     /* form outgoing message for i-structure:
3120          buf_si[0]:                 nrows to be sent
3121                [1:nrows]:           row index (global)
3122                [nrows+1:2*nrows+1]: i-structure index
3123     */
3124     /*-------------------------------------------*/
3125     nrows = len_si[proc]/2 - 1;
3126     buf_si_i    = buf_si + nrows+1;
3127     buf_si[0]   = nrows;
3128     buf_si_i[0] = 0;
3129     nrows = 0;
3130     for (i=owners[proc]; i<owners[proc+1]; i++){
3131       anzi = ai[i+1] - ai[i];
3132       if (anzi) {
3133         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3134         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3135         nrows++;
3136       }
3137     }
3138     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3139     k++;
3140     buf_si += len_si[proc];
3141   }
3142 
3143   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3144   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3145 
3146   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3147   for (i=0; i<merge->nrecv; i++){
3148     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3149   }
3150 
3151   ierr = PetscFree(len_si);CHKERRQ(ierr);
3152   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3153   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3154   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3155   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3156   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3157   ierr = PetscFree(status);CHKERRQ(ierr);
3158 
3159   /* compute a local seq matrix in each processor */
3160   /*----------------------------------------------*/
3161   /* allocate bi array and free space for accumulating nonzero column info */
3162   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3163   bi[0] = 0;
3164 
3165   /* create and initialize a linked list */
3166   nlnk = N+1;
3167   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3168 
3169   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3170   len = 0;
3171   len  = ai[owners[rank+1]] - ai[owners[rank]];
3172   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3173   current_space = free_space;
3174 
3175   /* determine symbolic info for each local row */
3176   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3177   nextrow = buf_ri_k + merge->nrecv;
3178   nextai  = nextrow + merge->nrecv;
3179   for (k=0; k<merge->nrecv; k++){
3180     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3181     nrows = *buf_ri_k[k];
3182     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3183     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3184   }
3185 
3186   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3187   len = 0;
3188   for (i=0;i<m;i++) {
3189     bnzi   = 0;
3190     /* add local non-zero cols of this proc's seqmat into lnk */
3191     arow   = owners[rank] + i;
3192     anzi   = ai[arow+1] - ai[arow];
3193     aj     = a->j + ai[arow];
3194     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3195     bnzi += nlnk;
3196     /* add received col data into lnk */
3197     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3198       if (i == *nextrow[k]) { /* i-th row */
3199         anzi = *(nextai[k]+1) - *nextai[k];
3200         aj   = buf_rj[k] + *nextai[k];
3201         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3202         bnzi += nlnk;
3203         nextrow[k]++; nextai[k]++;
3204       }
3205     }
3206     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3207 
3208     /* if free space is not available, make more free space */
3209     if (current_space->local_remaining<bnzi) {
3210       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3211       nspacedouble++;
3212     }
3213     /* copy data into free space, then initialize lnk */
3214     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3215     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3216 
3217     current_space->array           += bnzi;
3218     current_space->local_used      += bnzi;
3219     current_space->local_remaining -= bnzi;
3220 
3221     bi[i+1] = bi[i] + bnzi;
3222   }
3223 
3224   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3225 
3226   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3227   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3228   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3229 
3230   /* create symbolic parallel matrix B_mpi */
3231   /*---------------------------------------*/
3232   if (n==PETSC_DECIDE) {
3233     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3234   } else {
3235     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3236   }
3237   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3238   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3239   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3240 
3241   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3242   B_mpi->assembled     = PETSC_FALSE;
3243   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3244   merge->bi            = bi;
3245   merge->bj            = bj;
3246   merge->buf_ri        = buf_ri;
3247   merge->buf_rj        = buf_rj;
3248   merge->coi           = PETSC_NULL;
3249   merge->coj           = PETSC_NULL;
3250   merge->owners_co     = PETSC_NULL;
3251 
3252   /* attach the supporting struct to B_mpi for reuse */
3253   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3254   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3255   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3256   *mpimat = B_mpi;
3257   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3258   PetscFunctionReturn(0);
3259 }
3260 
3261 static PetscEvent logkey_seqstompi = 0;
3262 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3263 {
3264   PetscErrorCode   ierr;
3265 
3266   PetscFunctionBegin;
3267   if (!logkey_seqstompi) {
3268     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3269   }
3270   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3271   if (scall == MAT_INITIAL_MATRIX){
3272     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3273   }
3274   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3275   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3276   PetscFunctionReturn(0);
3277 }
3278 static PetscEvent logkey_getlocalmat = 0;
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatGetLocalMat"
3281 /*@C
3282      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3283 
3284     Not Collective
3285 
3286    Input Parameters:
3287 +    A - the matrix
3288 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3289 
3290    Output Parameter:
3291 .    A_loc - the local sequential matrix generated
3292 
3293     Level: developer
3294 
3295 @*/
3296 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3297 {
3298   PetscErrorCode  ierr;
3299   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3300   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3301   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3302   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3303   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3304   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3305 
3306   PetscFunctionBegin;
3307   if (!logkey_getlocalmat) {
3308     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3309   }
3310   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3311   if (scall == MAT_INITIAL_MATRIX){
3312     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3313     ci[0] = 0;
3314     for (i=0; i<am; i++){
3315       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3316     }
3317     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3318     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3319     k = 0;
3320     for (i=0; i<am; i++) {
3321       ncols_o = bi[i+1] - bi[i];
3322       ncols_d = ai[i+1] - ai[i];
3323       /* off-diagonal portion of A */
3324       for (jo=0; jo<ncols_o; jo++) {
3325         col = cmap[*bj];
3326         if (col >= cstart) break;
3327         cj[k]   = col; bj++;
3328         ca[k++] = *ba++;
3329       }
3330       /* diagonal portion of A */
3331       for (j=0; j<ncols_d; j++) {
3332         cj[k]   = cstart + *aj++;
3333         ca[k++] = *aa++;
3334       }
3335       /* off-diagonal portion of A */
3336       for (j=jo; j<ncols_o; j++) {
3337         cj[k]   = cmap[*bj++];
3338         ca[k++] = *ba++;
3339       }
3340     }
3341     /* put together the new matrix */
3342     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3343     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3344     /* Since these are PETSc arrays, change flags to free them as necessary. */
3345     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3346     mat->freedata = PETSC_TRUE;
3347     mat->nonew    = 0;
3348   } else if (scall == MAT_REUSE_MATRIX){
3349     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3350     ci = mat->i; cj = mat->j; ca = mat->a;
3351     for (i=0; i<am; i++) {
3352       /* off-diagonal portion of A */
3353       ncols_o = bi[i+1] - bi[i];
3354       for (jo=0; jo<ncols_o; jo++) {
3355         col = cmap[*bj];
3356         if (col >= cstart) break;
3357         *ca++ = *ba++; bj++;
3358       }
3359       /* diagonal portion of A */
3360       ncols_d = ai[i+1] - ai[i];
3361       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3362       /* off-diagonal portion of A */
3363       for (j=jo; j<ncols_o; j++) {
3364         *ca++ = *ba++; bj++;
3365       }
3366     }
3367   } else {
3368     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3369   }
3370 
3371   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 static PetscEvent logkey_getlocalmatcondensed = 0;
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatGetLocalMatCondensed"
3378 /*@C
3379      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3380 
3381     Not Collective
3382 
3383    Input Parameters:
3384 +    A - the matrix
3385 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3386 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3387 
3388    Output Parameter:
3389 .    A_loc - the local sequential matrix generated
3390 
3391     Level: developer
3392 
3393 @*/
3394 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3395 {
3396   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3397   PetscErrorCode    ierr;
3398   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3399   IS                isrowa,iscola;
3400   Mat               *aloc;
3401 
3402   PetscFunctionBegin;
3403   if (!logkey_getlocalmatcondensed) {
3404     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3405   }
3406   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3407   if (!row){
3408     start = a->rstart; end = a->rend;
3409     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3410   } else {
3411     isrowa = *row;
3412   }
3413   if (!col){
3414     start = a->cstart;
3415     cmap  = a->garray;
3416     nzA   = a->A->n;
3417     nzB   = a->B->n;
3418     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3419     ncols = 0;
3420     for (i=0; i<nzB; i++) {
3421       if (cmap[i] < start) idx[ncols++] = cmap[i];
3422       else break;
3423     }
3424     imark = i;
3425     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3426     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3427     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3428     ierr = PetscFree(idx);CHKERRQ(ierr);
3429   } else {
3430     iscola = *col;
3431   }
3432   if (scall != MAT_INITIAL_MATRIX){
3433     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3434     aloc[0] = *A_loc;
3435   }
3436   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3437   *A_loc = aloc[0];
3438   ierr = PetscFree(aloc);CHKERRQ(ierr);
3439   if (!row){
3440     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3441   }
3442   if (!col){
3443     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3444   }
3445   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 static PetscEvent logkey_GetBrowsOfAcols = 0;
3450 #undef __FUNCT__
3451 #define __FUNCT__ "MatGetBrowsOfAcols"
3452 /*@C
3453     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3454 
3455     Collective on Mat
3456 
3457    Input Parameters:
3458 +    A,B - the matrices in mpiaij format
3459 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3460 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3461 
3462    Output Parameter:
3463 +    rowb, colb - index sets of rows and columns of B to extract
3464 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3465 -    B_seq - the sequential matrix generated
3466 
3467     Level: developer
3468 
3469 @*/
3470 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3471 {
3472   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3473   PetscErrorCode    ierr;
3474   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3475   IS                isrowb,iscolb;
3476   Mat               *bseq;
3477 
3478   PetscFunctionBegin;
3479   if (a->cstart != b->rstart || a->cend != b->rend){
3480     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3481   }
3482   if (!logkey_GetBrowsOfAcols) {
3483     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3484   }
3485   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3486 
3487   if (scall == MAT_INITIAL_MATRIX){
3488     start = a->cstart;
3489     cmap  = a->garray;
3490     nzA   = a->A->n;
3491     nzB   = a->B->n;
3492     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3493     ncols = 0;
3494     for (i=0; i<nzB; i++) {  /* row < local row index */
3495       if (cmap[i] < start) idx[ncols++] = cmap[i];
3496       else break;
3497     }
3498     imark = i;
3499     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3500     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3501     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3502     ierr = PetscFree(idx);CHKERRQ(ierr);
3503     *brstart = imark;
3504     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3505   } else {
3506     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3507     isrowb = *rowb; iscolb = *colb;
3508     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3509     bseq[0] = *B_seq;
3510   }
3511   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3512   *B_seq = bseq[0];
3513   ierr = PetscFree(bseq);CHKERRQ(ierr);
3514   if (!rowb){
3515     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3516   } else {
3517     *rowb = isrowb;
3518   }
3519   if (!colb){
3520     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3521   } else {
3522     *colb = iscolb;
3523   }
3524   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3525   PetscFunctionReturn(0);
3526 }
3527 
3528 static PetscEvent logkey_GetBrowsOfAocols = 0;
3529 #undef __FUNCT__
3530 #define __FUNCT__ "MatGetBrowsOfAoCols"
3531 /*@C
3532     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3533     of the OFF-DIAGONAL portion of local A
3534 
3535     Collective on Mat
3536 
3537    Input Parameters:
3538 +    A,B - the matrices in mpiaij format
3539 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3540 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3541 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3542 
3543    Output Parameter:
3544 +    B_oth - the sequential matrix generated
3545 
3546     Level: developer
3547 
3548 @*/
3549 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3550 {
3551   VecScatter_MPI_General *gen_to,*gen_from;
3552   PetscErrorCode         ierr;
3553   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3554   Mat_SeqAIJ             *b_oth;
3555   VecScatter             ctx=a->Mvctx;
3556   MPI_Comm               comm=ctx->comm;
3557   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3558   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3559   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3560   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj,*sstarts,*sstartsj,len;
3561   MPI_Request            *rwaits,*swaits;
3562   MPI_Status             *sstatus,rstatus;
3563   PetscInt               *cols;
3564   PetscScalar            *vals;
3565   PetscMPIInt            j;
3566 
3567   PetscFunctionBegin;
3568   if (a->cstart != b->rstart || a->cend != b->rend){
3569     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3570   }
3571   if (!logkey_GetBrowsOfAocols) {
3572     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3573   }
3574   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3575   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3576 
3577   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3578   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3579   rvalues  = gen_from->values; /* holds the length of sending row */
3580   svalues  = gen_to->values;   /* holds the length of receiving row */
3581   nrecvs   = gen_from->n;
3582   nsends   = gen_to->n;
3583   rwaits   = gen_from->requests;
3584   swaits   = gen_to->requests;
3585   rrow     = gen_from->indices; /* local row index to be received */
3586   srow     = gen_to->indices;   /* local row index to be sent */
3587   rstarts  = gen_from->starts;
3588   sstarts  = gen_to->starts;
3589   rprocs   = gen_from->procs;
3590   sprocs   = gen_to->procs;
3591   sstatus  = gen_to->sstatus;
3592 
3593   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3594   if (scall == MAT_INITIAL_MATRIX){
3595     /* i-array */
3596     /*---------*/
3597     /*  post receives */
3598     for (i=0; i<nrecvs; i++){
3599       rowlen = (PetscInt*)rvalues + rstarts[i];
3600       nrows = rstarts[i+1]-rstarts[i];
3601       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3602     }
3603 
3604     /* pack the outgoing message */
3605     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3606     rstartsj = sstartsj + nsends +1;
3607     sstartsj[0] = 0;  rstartsj[0] = 0;
3608     len = 0; /* total length of j or a array to be sent */
3609     k = 0;
3610     for (i=0; i<nsends; i++){
3611       rowlen = (PetscInt*)svalues + sstarts[i];
3612       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3613       for (j=0; j<nrows; j++) {
3614         row = srow[k] + b->rowners[rank]; /* global row idx */
3615         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3616         len += rowlen[j];
3617         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3618         k++;
3619       }
3620       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3621        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3622     }
3623     /* recvs and sends of i-array are completed */
3624     i = nrecvs;
3625     while (i--) {
3626       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3627     }
3628     if (nsends) {
3629       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3630     }
3631     /* allocate buffers for sending j and a arrays */
3632     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3633     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3634 
3635     /* create i-array of B_oth */
3636     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3637     b_othi[0] = 0;
3638     len = 0; /* total length of j or a array to be received */
3639     k = 0;
3640     for (i=0; i<nrecvs; i++){
3641       rowlen = (PetscInt*)rvalues + rstarts[i];
3642       nrows = rstarts[i+1]-rstarts[i];
3643       for (j=0; j<nrows; j++) {
3644         b_othi[k+1] = b_othi[k] + rowlen[j];
3645         len += rowlen[j]; k++;
3646       }
3647       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3648     }
3649 
3650     /* allocate space for j and a arrrays of B_oth */
3651     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3652     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3653 
3654     /* j-array */
3655     /*---------*/
3656     /*  post receives of j-array */
3657     for (i=0; i<nrecvs; i++){
3658       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3659       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3660     }
3661     k = 0;
3662     for (i=0; i<nsends; i++){
3663       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3664       bufJ = bufj+sstartsj[i];
3665       for (j=0; j<nrows; j++) {
3666         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3667         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3668         for (l=0; l<ncols; l++){
3669           *bufJ++ = cols[l];
3670         }
3671         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3672       }
3673       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3674     }
3675 
3676     /* recvs and sends of j-array are completed */
3677     i = nrecvs;
3678     while (i--) {
3679       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3680     }
3681     if (nsends) {
3682       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3683     }
3684   } else if (scall == MAT_REUSE_MATRIX){
3685     sstartsj = *startsj;
3686     rstartsj = sstartsj + nsends +1;
3687     bufa     = *bufa_ptr;
3688     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3689     b_otha   = b_oth->a;
3690   } else {
3691     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3692   }
3693 
3694   /* a-array */
3695   /*---------*/
3696   /*  post receives of a-array */
3697   for (i=0; i<nrecvs; i++){
3698     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3699     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3700   }
3701   k = 0;
3702   for (i=0; i<nsends; i++){
3703     nrows = sstarts[i+1]-sstarts[i];
3704     bufA = bufa+sstartsj[i];
3705     for (j=0; j<nrows; j++) {
3706       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3707       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3708       for (l=0; l<ncols; l++){
3709         *bufA++ = vals[l];
3710       }
3711       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3712 
3713     }
3714     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3715   }
3716   /* recvs and sends of a-array are completed */
3717   i = nrecvs;
3718   while (i--) {
3719     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720   }
3721    if (nsends) {
3722     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3723   }
3724 
3725   if (scall == MAT_INITIAL_MATRIX){
3726     /* put together the new matrix */
3727     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3728 
3729     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3730     /* Since these are PETSc arrays, change flags to free them as necessary. */
3731     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3732     b_oth->freedata = PETSC_TRUE;
3733     b_oth->nonew    = 0;
3734 
3735     ierr = PetscFree(bufj);CHKERRQ(ierr);
3736     if (!startsj || !bufa_ptr){
3737       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3738       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3739     } else {
3740       *startsj  = sstartsj;
3741       *bufa_ptr = bufa;
3742     }
3743   }
3744   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3745 
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 /*MC
3750    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3751 
3752    Options Database Keys:
3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3754 
3755   Level: beginner
3756 
3757 .seealso: MatCreateMPIAIJ
3758 M*/
3759 
3760 EXTERN_C_BEGIN
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatCreate_MPIAIJ"
3763 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3764 {
3765   Mat_MPIAIJ     *b;
3766   PetscErrorCode ierr;
3767   PetscInt       i;
3768   PetscMPIInt    size;
3769 
3770   PetscFunctionBegin;
3771   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3772 
3773   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3774   B->data         = (void*)b;
3775   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3776   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3777   B->factor       = 0;
3778   B->bs           = 1;
3779   B->assembled    = PETSC_FALSE;
3780   B->mapping      = 0;
3781 
3782   B->insertmode      = NOT_SET_VALUES;
3783   b->size            = size;
3784   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3785 
3786   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3787   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3788 
3789   /* the information in the maps duplicates the information computed below, eventually
3790      we should remove the duplicate information that is not contained in the maps */
3791   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3792   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3793 
3794   /* build local table of row and column ownerships */
3795   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3796   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3797   b->cowners = b->rowners + b->size + 2;
3798   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3799   b->rowners[0] = 0;
3800   for (i=2; i<=b->size; i++) {
3801     b->rowners[i] += b->rowners[i-1];
3802   }
3803   b->rstart = b->rowners[b->rank];
3804   b->rend   = b->rowners[b->rank+1];
3805   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3806   b->cowners[0] = 0;
3807   for (i=2; i<=b->size; i++) {
3808     b->cowners[i] += b->cowners[i-1];
3809   }
3810   b->cstart = b->cowners[b->rank];
3811   b->cend   = b->cowners[b->rank+1];
3812 
3813   /* build cache for off array entries formed */
3814   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3815   b->donotstash  = PETSC_FALSE;
3816   b->colmap      = 0;
3817   b->garray      = 0;
3818   b->roworiented = PETSC_TRUE;
3819 
3820   /* stuff used for matrix vector multiply */
3821   b->lvec      = PETSC_NULL;
3822   b->Mvctx     = PETSC_NULL;
3823 
3824   /* stuff for MatGetRow() */
3825   b->rowindices   = 0;
3826   b->rowvalues    = 0;
3827   b->getrowactive = PETSC_FALSE;
3828 
3829   /* Explicitly create 2 MATSEQAIJ matrices. */
3830   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3831   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3832   PetscLogObjectParent(B,b->A);
3833   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3834   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3835   PetscLogObjectParent(B,b->B);
3836 
3837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3838                                      "MatStoreValues_MPIAIJ",
3839                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3841                                      "MatRetrieveValues_MPIAIJ",
3842                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3844 				     "MatGetDiagonalBlock_MPIAIJ",
3845                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3847 				     "MatIsTranspose_MPIAIJ",
3848 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3850 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3851 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3853 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3854 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3856 				     "MatDiagonalScaleLocal_MPIAIJ",
3857 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3858   PetscFunctionReturn(0);
3859 }
3860 EXTERN_C_END
3861