xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a5ff213d897f5fcaef42ae26f8ae0a8e003d03e2)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618   PetscTruth     merged;
619 
620   PetscFunctionBegin;
621   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
622   /* do nondiagonal part */
623   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
624   if (!merged) {
625     /* send it on its way */
626     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
627     /* do local part */
628     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
629     /* receive remote parts: note this assumes the values are not actually */
630     /* added in yy until the next line, */
631     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
632   } else {
633     /* do local part */
634     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
635     /* send it on its way */
636     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637     /* values actually were received in the Begin() but we need to call this nop */
638     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639   }
640   PetscFunctionReturn(0);
641 }
642 
643 EXTERN_C_BEGIN
644 #undef __FUNCT__
645 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
646 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
647 {
648   MPI_Comm       comm;
649   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
651   IS             Me,Notme;
652   PetscErrorCode ierr;
653   PetscInt       M,N,first,last,*notme,i;
654   PetscMPIInt    size;
655 
656   PetscFunctionBegin;
657 
658   /* Easy test: symmetric diagonal block */
659   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
660   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
661   if (!*f) PetscFunctionReturn(0);
662   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
663   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
664   if (size == 1) PetscFunctionReturn(0);
665 
666   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
667   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
668   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
669   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
670   for (i=0; i<first; i++) notme[i] = i;
671   for (i=last; i<M; i++) notme[i-last+first] = i;
672   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
673   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
674   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
675   Aoff = Aoffs[0];
676   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
677   Boff = Boffs[0];
678   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
679   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
680   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
681   ierr = ISDestroy(Me);CHKERRQ(ierr);
682   ierr = ISDestroy(Notme);CHKERRQ(ierr);
683 
684   PetscFunctionReturn(0);
685 }
686 EXTERN_C_END
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
690 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
691 {
692   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
693   PetscErrorCode ierr;
694 
695   PetscFunctionBegin;
696   /* do nondiagonal part */
697   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
698   /* send it on its way */
699   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   /* do local part */
701   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
702   /* receive remote parts */
703   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 /*
708   This only works correctly for square matrices where the subblock A->A is the
709    diagonal block
710 */
711 #undef __FUNCT__
712 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
713 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
714 {
715   PetscErrorCode ierr;
716   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
717 
718   PetscFunctionBegin;
719   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
720   if (a->rstart != a->cstart || a->rend != a->cend) {
721     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
722   }
723   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatScale_MPIAIJ"
729 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
730 {
731   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
736   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPIAIJ"
742 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
743 {
744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
753   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
757 #else
758   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
759 #endif
760   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
761   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
762   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
763   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
764   ierr = PetscFree(aij);CHKERRQ(ierr);
765 
766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatView_MPIAIJ_Binary"
778 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
779 {
780   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
781   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
782   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
783   PetscErrorCode    ierr;
784   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
785   int               fd;
786   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
787   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
788   PetscScalar       *column_values;
789 
790   PetscFunctionBegin;
791   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
793   nz   = A->nz + B->nz;
794   if (!rank) {
795     header[0] = MAT_FILE_COOKIE;
796     header[1] = mat->M;
797     header[2] = mat->N;
798     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
799     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
800     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
801     /* get largest number of rows any processor has */
802     rlen = mat->m;
803     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       rlen = PetscMax(rlen,range[i+1] - range[i]);
806     }
807   } else {
808     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
809     rlen = mat->m;
810   }
811 
812   /* load up the local row counts */
813   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
814   for (i=0; i<mat->m; i++) {
815     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
816   }
817 
818   /* store the row lengths to the file */
819   if (!rank) {
820     MPI_Status status;
821     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
822     for (i=1; i<size; i++) {
823       rlen = range[i+1] - range[i];
824       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
826     }
827   } else {
828     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
831 
832   /* load up the local column indices */
833   nzmax = nz; /* )th processor needs space a largest processor needs */
834   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
835   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
836   cnt  = 0;
837   for (i=0; i<mat->m; i++) {
838     for (j=B->i[i]; j<B->i[i+1]; j++) {
839       if ( (col = garray[B->j[j]]) > cstart) break;
840       column_indices[cnt++] = col;
841     }
842     for (k=A->i[i]; k<A->i[i+1]; k++) {
843       column_indices[cnt++] = A->j[k] + cstart;
844     }
845     for (; j<B->i[i+1]; j++) {
846       column_indices[cnt++] = garray[B->j[j]];
847     }
848   }
849   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
850 
851   /* store the column indices to the file */
852   if (!rank) {
853     MPI_Status status;
854     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
855     for (i=1; i<size; i++) {
856       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
858       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
860     }
861   } else {
862     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
863     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(column_indices);CHKERRQ(ierr);
866 
867   /* load up the local column values */
868   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
869   cnt  = 0;
870   for (i=0; i<mat->m; i++) {
871     for (j=B->i[i]; j<B->i[i+1]; j++) {
872       if ( garray[B->j[j]] > cstart) break;
873       column_values[cnt++] = B->a[j];
874     }
875     for (k=A->i[i]; k<A->i[i+1]; k++) {
876       column_values[cnt++] = A->a[k];
877     }
878     for (; j<B->i[i+1]; j++) {
879       column_values[cnt++] = B->a[j];
880     }
881   }
882   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
883 
884   /* store the column values to the file */
885   if (!rank) {
886     MPI_Status status;
887     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
888     for (i=1; i<size; i++) {
889       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
890       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
891       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
892       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
893     }
894   } else {
895     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
896     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
897   }
898   ierr = PetscFree(column_values);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
904 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
905 {
906   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
907   PetscErrorCode    ierr;
908   PetscMPIInt       rank = aij->rank,size = aij->size;
909   PetscTruth        isdraw,iascii,isbinary;
910   PetscViewer       sviewer;
911   PetscViewerFormat format;
912 
913   PetscFunctionBegin;
914   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
915   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
916   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
917   if (iascii) {
918     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
919     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
920       MatInfo    info;
921       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
922       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
923       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
924       if (!a->inode.size) {
925         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
926 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
927       } else {
928         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
929 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
930       }
931       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
932       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
933       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
934       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
935       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
936       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
937       PetscFunctionReturn(0);
938     } else if (format == PETSC_VIEWER_ASCII_INFO) {
939       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
940       if (a->inode.size) {
941         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
942       } else {
943         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
944       }
945       PetscFunctionReturn(0);
946     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
947       PetscFunctionReturn(0);
948     }
949   } else if (isbinary) {
950     if (size == 1) {
951       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
952       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
953     } else {
954       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
955     }
956     PetscFunctionReturn(0);
957   } else if (isdraw) {
958     PetscDraw  draw;
959     PetscTruth isnull;
960     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
961     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
962   }
963 
964   if (size == 1) {
965     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
966     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
967   } else {
968     /* assemble the entire matrix onto first processor. */
969     Mat         A;
970     Mat_SeqAIJ  *Aloc;
971     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
972     PetscScalar *a;
973 
974     if (!rank) {
975       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
976     } else {
977       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
978     }
979     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
980     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
981     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
982     PetscLogObjectParent(mat,A);
983 
984     /* copy over the A part */
985     Aloc = (Mat_SeqAIJ*)aij->A->data;
986     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
987     row = aij->rstart;
988     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
989     for (i=0; i<m; i++) {
990       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
991       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
992     }
993     aj = Aloc->j;
994     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
995 
996     /* copy over the B part */
997     Aloc = (Mat_SeqAIJ*)aij->B->data;
998     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
999     row  = aij->rstart;
1000     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1001     ct   = cols;
1002     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1003     for (i=0; i<m; i++) {
1004       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1005       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1006     }
1007     ierr = PetscFree(ct);CHKERRQ(ierr);
1008     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1009     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1010     /*
1011        Everyone has to call to draw the matrix since the graphics waits are
1012        synchronized across all processors that share the PetscDraw object
1013     */
1014     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1015     if (!rank) {
1016       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1017       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1018     }
1019     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1020     ierr = MatDestroy(A);CHKERRQ(ierr);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatView_MPIAIJ"
1027 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1028 {
1029   PetscErrorCode ierr;
1030   PetscTruth     iascii,isdraw,issocket,isbinary;
1031 
1032   PetscFunctionBegin;
1033   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1034   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1035   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1036   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1037   if (iascii || isdraw || isbinary || issocket) {
1038     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1039   } else {
1040     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatRelax_MPIAIJ"
1049 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1050 {
1051   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1052   PetscErrorCode ierr;
1053   Vec            bb1;
1054   PetscScalar    mone=-1.0;
1055 
1056   PetscFunctionBegin;
1057   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1058 
1059   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1060 
1061   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1062     if (flag & SOR_ZERO_INITIAL_GUESS) {
1063       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1064       its--;
1065     }
1066 
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1080     if (flag & SOR_ZERO_INITIAL_GUESS) {
1081       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1082       its--;
1083     }
1084     while (its--) {
1085       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1086       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1087 
1088       /* update rhs: bb1 = bb - B*x */
1089       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1090       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1091 
1092       /* local sweep */
1093       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1094       CHKERRQ(ierr);
1095     }
1096   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1097     if (flag & SOR_ZERO_INITIAL_GUESS) {
1098       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1099       its--;
1100     }
1101     while (its--) {
1102       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1103       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1104 
1105       /* update rhs: bb1 = bb - B*x */
1106       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1107       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1108 
1109       /* local sweep */
1110       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1111       CHKERRQ(ierr);
1112     }
1113   } else {
1114     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1115   }
1116 
1117   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1123 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1124 {
1125   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1126   Mat            A = mat->A,B = mat->B;
1127   PetscErrorCode ierr;
1128   PetscReal      isend[5],irecv[5];
1129 
1130   PetscFunctionBegin;
1131   info->block_size     = 1.0;
1132   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1133   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1134   isend[3] = info->memory;  isend[4] = info->mallocs;
1135   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1136   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1137   isend[3] += info->memory;  isend[4] += info->mallocs;
1138   if (flag == MAT_LOCAL) {
1139     info->nz_used      = isend[0];
1140     info->nz_allocated = isend[1];
1141     info->nz_unneeded  = isend[2];
1142     info->memory       = isend[3];
1143     info->mallocs      = isend[4];
1144   } else if (flag == MAT_GLOBAL_MAX) {
1145     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1146     info->nz_used      = irecv[0];
1147     info->nz_allocated = irecv[1];
1148     info->nz_unneeded  = irecv[2];
1149     info->memory       = irecv[3];
1150     info->mallocs      = irecv[4];
1151   } else if (flag == MAT_GLOBAL_SUM) {
1152     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1153     info->nz_used      = irecv[0];
1154     info->nz_allocated = irecv[1];
1155     info->nz_unneeded  = irecv[2];
1156     info->memory       = irecv[3];
1157     info->mallocs      = irecv[4];
1158   }
1159   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1160   info->fill_ratio_needed = 0;
1161   info->factor_mallocs    = 0;
1162   info->rows_global       = (double)matin->M;
1163   info->columns_global    = (double)matin->N;
1164   info->rows_local        = (double)matin->m;
1165   info->columns_local     = (double)matin->N;
1166 
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatSetOption_MPIAIJ"
1172 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1173 {
1174   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   switch (op) {
1179   case MAT_NO_NEW_NONZERO_LOCATIONS:
1180   case MAT_YES_NEW_NONZERO_LOCATIONS:
1181   case MAT_COLUMNS_UNSORTED:
1182   case MAT_COLUMNS_SORTED:
1183   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1184   case MAT_KEEP_ZEROED_ROWS:
1185   case MAT_NEW_NONZERO_LOCATION_ERR:
1186   case MAT_USE_INODES:
1187   case MAT_DO_NOT_USE_INODES:
1188   case MAT_IGNORE_ZERO_ENTRIES:
1189     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1190     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1191     break;
1192   case MAT_ROW_ORIENTED:
1193     a->roworiented = PETSC_TRUE;
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1196     break;
1197   case MAT_ROWS_SORTED:
1198   case MAT_ROWS_UNSORTED:
1199   case MAT_YES_NEW_DIAGONALS:
1200     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1201     break;
1202   case MAT_COLUMN_ORIENTED:
1203     a->roworiented = PETSC_FALSE;
1204     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1206     break;
1207   case MAT_IGNORE_OFF_PROC_ENTRIES:
1208     a->donotstash = PETSC_TRUE;
1209     break;
1210   case MAT_NO_NEW_DIAGONALS:
1211     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1212   case MAT_SYMMETRIC:
1213   case MAT_STRUCTURALLY_SYMMETRIC:
1214   case MAT_HERMITIAN:
1215   case MAT_SYMMETRY_ETERNAL:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     break;
1218   case MAT_NOT_SYMMETRIC:
1219   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1220   case MAT_NOT_HERMITIAN:
1221   case MAT_NOT_SYMMETRY_ETERNAL:
1222     break;
1223   default:
1224     SETERRQ(PETSC_ERR_SUP,"unknown option");
1225   }
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 #undef __FUNCT__
1230 #define __FUNCT__ "MatGetRow_MPIAIJ"
1231 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1232 {
1233   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1234   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1235   PetscErrorCode ierr;
1236   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1237   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1238   PetscInt       *cmap,*idx_p;
1239 
1240   PetscFunctionBegin;
1241   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1242   mat->getrowactive = PETSC_TRUE;
1243 
1244   if (!mat->rowvalues && (idx || v)) {
1245     /*
1246         allocate enough space to hold information from the longest row.
1247     */
1248     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1249     PetscInt     max = 1,tmp;
1250     for (i=0; i<matin->m; i++) {
1251       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1252       if (max < tmp) { max = tmp; }
1253     }
1254     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1255     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1256   }
1257 
1258   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1259   lrow = row - rstart;
1260 
1261   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1262   if (!v)   {pvA = 0; pvB = 0;}
1263   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1264   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1265   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1266   nztot = nzA + nzB;
1267 
1268   cmap  = mat->garray;
1269   if (v  || idx) {
1270     if (nztot) {
1271       /* Sort by increasing column numbers, assuming A and B already sorted */
1272       PetscInt imark = -1;
1273       if (v) {
1274         *v = v_p = mat->rowvalues;
1275         for (i=0; i<nzB; i++) {
1276           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1277           else break;
1278         }
1279         imark = i;
1280         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1281         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1282       }
1283       if (idx) {
1284         *idx = idx_p = mat->rowindices;
1285         if (imark > -1) {
1286           for (i=0; i<imark; i++) {
1287             idx_p[i] = cmap[cworkB[i]];
1288           }
1289         } else {
1290           for (i=0; i<nzB; i++) {
1291             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1292             else break;
1293           }
1294           imark = i;
1295         }
1296         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1297         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1298       }
1299     } else {
1300       if (idx) *idx = 0;
1301       if (v)   *v   = 0;
1302     }
1303   }
1304   *nz = nztot;
1305   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1306   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1312 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1313 {
1314   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1315 
1316   PetscFunctionBegin;
1317   if (!aij->getrowactive) {
1318     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1319   }
1320   aij->getrowactive = PETSC_FALSE;
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatNorm_MPIAIJ"
1326 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1327 {
1328   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1329   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,cstart = aij->cstart;
1332   PetscReal      sum = 0.0;
1333   PetscScalar    *v;
1334 
1335   PetscFunctionBegin;
1336   if (aij->size == 1) {
1337     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1338   } else {
1339     if (type == NORM_FROBENIUS) {
1340       v = amat->a;
1341       for (i=0; i<amat->nz; i++) {
1342 #if defined(PETSC_USE_COMPLEX)
1343         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1344 #else
1345         sum += (*v)*(*v); v++;
1346 #endif
1347       }
1348       v = bmat->a;
1349       for (i=0; i<bmat->nz; i++) {
1350 #if defined(PETSC_USE_COMPLEX)
1351         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1352 #else
1353         sum += (*v)*(*v); v++;
1354 #endif
1355       }
1356       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1357       *norm = sqrt(*norm);
1358     } else if (type == NORM_1) { /* max column norm */
1359       PetscReal *tmp,*tmp2;
1360       PetscInt    *jj,*garray = aij->garray;
1361       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1362       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1363       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1364       *norm = 0.0;
1365       v = amat->a; jj = amat->j;
1366       for (j=0; j<amat->nz; j++) {
1367         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1368       }
1369       v = bmat->a; jj = bmat->j;
1370       for (j=0; j<bmat->nz; j++) {
1371         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1372       }
1373       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1374       for (j=0; j<mat->N; j++) {
1375         if (tmp2[j] > *norm) *norm = tmp2[j];
1376       }
1377       ierr = PetscFree(tmp);CHKERRQ(ierr);
1378       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1379     } else if (type == NORM_INFINITY) { /* max row norm */
1380       PetscReal ntemp = 0.0;
1381       for (j=0; j<aij->A->m; j++) {
1382         v = amat->a + amat->i[j];
1383         sum = 0.0;
1384         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1385           sum += PetscAbsScalar(*v); v++;
1386         }
1387         v = bmat->a + bmat->i[j];
1388         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1389           sum += PetscAbsScalar(*v); v++;
1390         }
1391         if (sum > ntemp) ntemp = sum;
1392       }
1393       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1394     } else {
1395       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1396     }
1397   }
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatTranspose_MPIAIJ"
1403 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1404 {
1405   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1406   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1407   PetscErrorCode ierr;
1408   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1409   Mat            B;
1410   PetscScalar    *array;
1411 
1412   PetscFunctionBegin;
1413   if (!matout && M != N) {
1414     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1415   }
1416 
1417   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1418   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1419   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1420 
1421   /* copy over the A part */
1422   Aloc = (Mat_SeqAIJ*)a->A->data;
1423   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1424   row = a->rstart;
1425   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1426   for (i=0; i<m; i++) {
1427     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1428     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1429   }
1430   aj = Aloc->j;
1431   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1432 
1433   /* copy over the B part */
1434   Aloc = (Mat_SeqAIJ*)a->B->data;
1435   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1436   row  = a->rstart;
1437   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1438   ct   = cols;
1439   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1440   for (i=0; i<m; i++) {
1441     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1442     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1443   }
1444   ierr = PetscFree(ct);CHKERRQ(ierr);
1445   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1446   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1447   if (matout) {
1448     *matout = B;
1449   } else {
1450     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1457 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1458 {
1459   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1460   Mat            a = aij->A,b = aij->B;
1461   PetscErrorCode ierr;
1462   PetscInt       s1,s2,s3;
1463 
1464   PetscFunctionBegin;
1465   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1466   if (rr) {
1467     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1468     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1469     /* Overlap communication with computation. */
1470     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1471   }
1472   if (ll) {
1473     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1474     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1475     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1476   }
1477   /* scale  the diagonal block */
1478   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1479 
1480   if (rr) {
1481     /* Do a scatter end and then right scale the off-diagonal block */
1482     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1483     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1484   }
1485 
1486   PetscFunctionReturn(0);
1487 }
1488 
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1492 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   if (!a->rank) {
1499     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1506 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1507 {
1508   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1513   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1514   PetscFunctionReturn(0);
1515 }
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1518 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatEqual_MPIAIJ"
1530 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1531 {
1532   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1533   Mat            a,b,c,d;
1534   PetscTruth     flg;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   a = matA->A; b = matA->B;
1539   c = matB->A; d = matB->B;
1540 
1541   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1542   if (flg) {
1543     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1544   }
1545   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatCopy_MPIAIJ"
1551 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1552 {
1553   PetscErrorCode ierr;
1554   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1555   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1556 
1557   PetscFunctionBegin;
1558   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1559   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1560     /* because of the column compression in the off-processor part of the matrix a->B,
1561        the number of columns in a->B and b->B may be different, hence we cannot call
1562        the MatCopy() directly on the two parts. If need be, we can provide a more
1563        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1564        then copying the submatrices */
1565     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1566   } else {
1567     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1568     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1569   }
1570   PetscFunctionReturn(0);
1571 }
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1575 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1576 {
1577   PetscErrorCode ierr;
1578 
1579   PetscFunctionBegin;
1580   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #include "petscblaslapack.h"
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatAXPY_MPIAIJ"
1587 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1588 {
1589   PetscErrorCode ierr;
1590   PetscInt       i;
1591   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1592   PetscBLASInt   bnz,one=1;
1593   Mat_SeqAIJ     *x,*y;
1594 
1595   PetscFunctionBegin;
1596   if (str == SAME_NONZERO_PATTERN) {
1597     x = (Mat_SeqAIJ *)xx->A->data;
1598     y = (Mat_SeqAIJ *)yy->A->data;
1599     bnz = (PetscBLASInt)x->nz;
1600     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1601     x = (Mat_SeqAIJ *)xx->B->data;
1602     y = (Mat_SeqAIJ *)yy->B->data;
1603     bnz = (PetscBLASInt)x->nz;
1604     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1605   } else if (str == SUBSET_NONZERO_PATTERN) {
1606     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1607 
1608     x = (Mat_SeqAIJ *)xx->B->data;
1609     y = (Mat_SeqAIJ *)yy->B->data;
1610     if (y->xtoy && y->XtoY != xx->B) {
1611       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1612       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1613     }
1614     if (!y->xtoy) { /* get xtoy */
1615       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1616       y->XtoY = xx->B;
1617     }
1618     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1619   } else {
1620     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 /* -------------------------------------------------------------------*/
1626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1627        MatGetRow_MPIAIJ,
1628        MatRestoreRow_MPIAIJ,
1629        MatMult_MPIAIJ,
1630 /* 4*/ MatMultAdd_MPIAIJ,
1631        MatMultTranspose_MPIAIJ,
1632        MatMultTransposeAdd_MPIAIJ,
1633        0,
1634        0,
1635        0,
1636 /*10*/ 0,
1637        0,
1638        0,
1639        MatRelax_MPIAIJ,
1640        MatTranspose_MPIAIJ,
1641 /*15*/ MatGetInfo_MPIAIJ,
1642        MatEqual_MPIAIJ,
1643        MatGetDiagonal_MPIAIJ,
1644        MatDiagonalScale_MPIAIJ,
1645        MatNorm_MPIAIJ,
1646 /*20*/ MatAssemblyBegin_MPIAIJ,
1647        MatAssemblyEnd_MPIAIJ,
1648        0,
1649        MatSetOption_MPIAIJ,
1650        MatZeroEntries_MPIAIJ,
1651 /*25*/ MatZeroRows_MPIAIJ,
1652        0,
1653        0,
1654        0,
1655        0,
1656 /*30*/ MatSetUpPreallocation_MPIAIJ,
1657        0,
1658        0,
1659        0,
1660        0,
1661 /*35*/ MatDuplicate_MPIAIJ,
1662        0,
1663        0,
1664        0,
1665        0,
1666 /*40*/ MatAXPY_MPIAIJ,
1667        MatGetSubMatrices_MPIAIJ,
1668        MatIncreaseOverlap_MPIAIJ,
1669        MatGetValues_MPIAIJ,
1670        MatCopy_MPIAIJ,
1671 /*45*/ MatPrintHelp_MPIAIJ,
1672        MatScale_MPIAIJ,
1673        0,
1674        0,
1675        0,
1676 /*50*/ MatSetBlockSize_MPIAIJ,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*55*/ MatFDColoringCreate_MPIAIJ,
1682        0,
1683        MatSetUnfactored_MPIAIJ,
1684        0,
1685        0,
1686 /*60*/ MatGetSubMatrix_MPIAIJ,
1687        MatDestroy_MPIAIJ,
1688        MatView_MPIAIJ,
1689        MatGetPetscMaps_Petsc,
1690        0,
1691 /*65*/ 0,
1692        0,
1693        0,
1694        0,
1695        0,
1696 /*70*/ 0,
1697        0,
1698        MatSetColoring_MPIAIJ,
1699 #if defined(PETSC_HAVE_ADIC)
1700        MatSetValuesAdic_MPIAIJ,
1701 #else
1702        0,
1703 #endif
1704        MatSetValuesAdifor_MPIAIJ,
1705 /*75*/ 0,
1706        0,
1707        0,
1708        0,
1709        0,
1710 /*80*/ 0,
1711        0,
1712        0,
1713        0,
1714 /*84*/ MatLoad_MPIAIJ,
1715        0,
1716        0,
1717        0,
1718        0,
1719        0,
1720 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1721        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1722        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1723        MatPtAP_MPIAIJ_MPIAIJ,
1724        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1725 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1726        0,
1727        0,
1728        0};
1729 
1730 /* ----------------------------------------------------------------------------------------*/
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1735 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1736 {
1737   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1738   PetscErrorCode ierr;
1739 
1740   PetscFunctionBegin;
1741   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1742   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 EXTERN_C_END
1746 
1747 EXTERN_C_BEGIN
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1750 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1751 {
1752   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1757   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1758   PetscFunctionReturn(0);
1759 }
1760 EXTERN_C_END
1761 
1762 #include "petscpc.h"
1763 EXTERN_C_BEGIN
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1766 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1767 {
1768   Mat_MPIAIJ     *b;
1769   PetscErrorCode ierr;
1770   PetscInt       i;
1771 
1772   PetscFunctionBegin;
1773   B->preallocated = PETSC_TRUE;
1774   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1775   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1776   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1777   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1778   if (d_nnz) {
1779     for (i=0; i<B->m; i++) {
1780       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1781     }
1782   }
1783   if (o_nnz) {
1784     for (i=0; i<B->m; i++) {
1785       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1786     }
1787   }
1788   b = (Mat_MPIAIJ*)B->data;
1789   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1790   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1791 
1792   PetscFunctionReturn(0);
1793 }
1794 EXTERN_C_END
1795 
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1798 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1799 {
1800   Mat            mat;
1801   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   *newmat       = 0;
1806   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1807   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1808   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1809   a    = (Mat_MPIAIJ*)mat->data;
1810 
1811   mat->factor       = matin->factor;
1812   mat->bs           = matin->bs;
1813   mat->assembled    = PETSC_TRUE;
1814   mat->insertmode   = NOT_SET_VALUES;
1815   mat->preallocated = PETSC_TRUE;
1816 
1817   a->rstart       = oldmat->rstart;
1818   a->rend         = oldmat->rend;
1819   a->cstart       = oldmat->cstart;
1820   a->cend         = oldmat->cend;
1821   a->size         = oldmat->size;
1822   a->rank         = oldmat->rank;
1823   a->donotstash   = oldmat->donotstash;
1824   a->roworiented  = oldmat->roworiented;
1825   a->rowindices   = 0;
1826   a->rowvalues    = 0;
1827   a->getrowactive = PETSC_FALSE;
1828 
1829   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1830   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1831   if (oldmat->colmap) {
1832 #if defined (PETSC_USE_CTABLE)
1833     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1834 #else
1835     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1836     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1837     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1838 #endif
1839   } else a->colmap = 0;
1840   if (oldmat->garray) {
1841     PetscInt len;
1842     len  = oldmat->B->n;
1843     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1844     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1845     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1846   } else a->garray = 0;
1847 
1848   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1849   PetscLogObjectParent(mat,a->lvec);
1850   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1851   PetscLogObjectParent(mat,a->Mvctx);
1852   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1853   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1854   PetscLogObjectParent(mat,a->A);
1855   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1856   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1857   PetscLogObjectParent(mat,a->B);
1858   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1859   *newmat = mat;
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #include "petscsys.h"
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatLoad_MPIAIJ"
1867 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1868 {
1869   Mat            A;
1870   PetscScalar    *vals,*svals;
1871   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1872   MPI_Status     status;
1873   PetscErrorCode ierr;
1874   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1875   PetscInt       i,nz,j,rstart,rend;
1876   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1877   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1878   PetscInt       cend,cstart,n,*rowners;
1879   int            fd;
1880 
1881   PetscFunctionBegin;
1882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1883   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1884   if (!rank) {
1885     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1886     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1887     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1888   }
1889 
1890   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1891   M = header[1]; N = header[2];
1892   /* determine ownership of all rows */
1893   m    = M/size + ((M % size) > rank);
1894   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1895   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1896   rowners[0] = 0;
1897   for (i=2; i<=size; i++) {
1898     rowners[i] += rowners[i-1];
1899   }
1900   rstart = rowners[rank];
1901   rend   = rowners[rank+1];
1902 
1903   /* distribute row lengths to all processors */
1904   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1905   if (!rank) {
1906     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1907     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1908     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1909     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1910     for (j=0; j<m; j++) {
1911       procsnz[0] += ourlens[j];
1912     }
1913     for (i=1; i<size; i++) {
1914       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1915       /* calculate the number of nonzeros on each processor */
1916       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1917         procsnz[i] += rowlengths[j];
1918       }
1919       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1920     }
1921     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1922   } else {
1923     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1924   }
1925 
1926   if (!rank) {
1927     /* determine max buffer needed and allocate it */
1928     maxnz = 0;
1929     for (i=0; i<size; i++) {
1930       maxnz = PetscMax(maxnz,procsnz[i]);
1931     }
1932     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1933 
1934     /* read in my part of the matrix column indices  */
1935     nz   = procsnz[0];
1936     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1937     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1938 
1939     /* read in every one elses and ship off */
1940     for (i=1; i<size; i++) {
1941       nz   = procsnz[i];
1942       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1943       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1944     }
1945     ierr = PetscFree(cols);CHKERRQ(ierr);
1946   } else {
1947     /* determine buffer space needed for message */
1948     nz = 0;
1949     for (i=0; i<m; i++) {
1950       nz += ourlens[i];
1951     }
1952     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1953 
1954     /* receive message of column indices*/
1955     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1956     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1957     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1958   }
1959 
1960   /* determine column ownership if matrix is not square */
1961   if (N != M) {
1962     n      = N/size + ((N % size) > rank);
1963     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1964     cstart = cend - n;
1965   } else {
1966     cstart = rstart;
1967     cend   = rend;
1968     n      = cend - cstart;
1969   }
1970 
1971   /* loop over local rows, determining number of off diagonal entries */
1972   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1973   jj = 0;
1974   for (i=0; i<m; i++) {
1975     for (j=0; j<ourlens[i]; j++) {
1976       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1977       jj++;
1978     }
1979   }
1980 
1981   /* create our matrix */
1982   for (i=0; i<m; i++) {
1983     ourlens[i] -= offlens[i];
1984   }
1985   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1986   ierr = MatSetType(A,type);CHKERRQ(ierr);
1987   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1988 
1989   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1990   for (i=0; i<m; i++) {
1991     ourlens[i] += offlens[i];
1992   }
1993 
1994   if (!rank) {
1995     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1996 
1997     /* read in my part of the matrix numerical values  */
1998     nz   = procsnz[0];
1999     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2000 
2001     /* insert into matrix */
2002     jj      = rstart;
2003     smycols = mycols;
2004     svals   = vals;
2005     for (i=0; i<m; i++) {
2006       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2007       smycols += ourlens[i];
2008       svals   += ourlens[i];
2009       jj++;
2010     }
2011 
2012     /* read in other processors and ship out */
2013     for (i=1; i<size; i++) {
2014       nz   = procsnz[i];
2015       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2016       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2017     }
2018     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2019   } else {
2020     /* receive numeric values */
2021     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2022 
2023     /* receive message of values*/
2024     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2025     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2026     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2027 
2028     /* insert into matrix */
2029     jj      = rstart;
2030     smycols = mycols;
2031     svals   = vals;
2032     for (i=0; i<m; i++) {
2033       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2034       smycols += ourlens[i];
2035       svals   += ourlens[i];
2036       jj++;
2037     }
2038   }
2039   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2040   ierr = PetscFree(vals);CHKERRQ(ierr);
2041   ierr = PetscFree(mycols);CHKERRQ(ierr);
2042   ierr = PetscFree(rowners);CHKERRQ(ierr);
2043 
2044   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2045   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2046   *newmat = A;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2052 /*
2053     Not great since it makes two copies of the submatrix, first an SeqAIJ
2054   in local and then by concatenating the local matrices the end result.
2055   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2056 */
2057 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2058 {
2059   PetscErrorCode ierr;
2060   PetscMPIInt    rank,size;
2061   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2062   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2063   Mat            *local,M,Mreuse;
2064   PetscScalar    *vwork,*aa;
2065   MPI_Comm       comm = mat->comm;
2066   Mat_SeqAIJ     *aij;
2067 
2068 
2069   PetscFunctionBegin;
2070   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2071   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2072 
2073   if (call ==  MAT_REUSE_MATRIX) {
2074     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2075     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2076     local = &Mreuse;
2077     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2078   } else {
2079     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2080     Mreuse = *local;
2081     ierr   = PetscFree(local);CHKERRQ(ierr);
2082   }
2083 
2084   /*
2085       m - number of local rows
2086       n - number of columns (same on all processors)
2087       rstart - first row in new global matrix generated
2088   */
2089   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2090   if (call == MAT_INITIAL_MATRIX) {
2091     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2092     ii  = aij->i;
2093     jj  = aij->j;
2094 
2095     /*
2096         Determine the number of non-zeros in the diagonal and off-diagonal
2097         portions of the matrix in order to do correct preallocation
2098     */
2099 
2100     /* first get start and end of "diagonal" columns */
2101     if (csize == PETSC_DECIDE) {
2102       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2103       if (mglobal == n) { /* square matrix */
2104 	nlocal = m;
2105       } else {
2106         nlocal = n/size + ((n % size) > rank);
2107       }
2108     } else {
2109       nlocal = csize;
2110     }
2111     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2112     rstart = rend - nlocal;
2113     if (rank == size - 1 && rend != n) {
2114       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2115     }
2116 
2117     /* next, compute all the lengths */
2118     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2119     olens = dlens + m;
2120     for (i=0; i<m; i++) {
2121       jend = ii[i+1] - ii[i];
2122       olen = 0;
2123       dlen = 0;
2124       for (j=0; j<jend; j++) {
2125         if (*jj < rstart || *jj >= rend) olen++;
2126         else dlen++;
2127         jj++;
2128       }
2129       olens[i] = olen;
2130       dlens[i] = dlen;
2131     }
2132     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2133     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2134     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2135     ierr = PetscFree(dlens);CHKERRQ(ierr);
2136   } else {
2137     PetscInt ml,nl;
2138 
2139     M = *newmat;
2140     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2141     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2142     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2143     /*
2144          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2145        rather than the slower MatSetValues().
2146     */
2147     M->was_assembled = PETSC_TRUE;
2148     M->assembled     = PETSC_FALSE;
2149   }
2150   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2151   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2152   ii  = aij->i;
2153   jj  = aij->j;
2154   aa  = aij->a;
2155   for (i=0; i<m; i++) {
2156     row   = rstart + i;
2157     nz    = ii[i+1] - ii[i];
2158     cwork = jj;     jj += nz;
2159     vwork = aa;     aa += nz;
2160     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2161   }
2162 
2163   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165   *newmat = M;
2166 
2167   /* save submatrix used in processor for next request */
2168   if (call ==  MAT_INITIAL_MATRIX) {
2169     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2170     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2171   }
2172 
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 EXTERN_C_BEGIN
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2179 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2180 {
2181   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2182   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2183   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2184   const PetscInt *JJ;
2185   PetscScalar    *values;
2186   PetscErrorCode ierr;
2187 
2188   PetscFunctionBegin;
2189 #if defined(PETSC_OPT_g)
2190   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2191 #endif
2192   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2193   o_nnz = d_nnz + m;
2194 
2195   for (i=0; i<m; i++) {
2196     nnz     = I[i+1]- I[i];
2197     JJ      = J + I[i];
2198     nnz_max = PetscMax(nnz_max,nnz);
2199 #if defined(PETSC_OPT_g)
2200     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2201 #endif
2202     for (j=0; j<nnz; j++) {
2203       if (*JJ >= cstart) break;
2204       JJ++;
2205     }
2206     d = 0;
2207     for (; j<nnz; j++) {
2208       if (*JJ++ >= cend) break;
2209       d++;
2210     }
2211     d_nnz[i] = d;
2212     o_nnz[i] = nnz - d;
2213   }
2214   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2215   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2216 
2217   if (v) values = (PetscScalar*)v;
2218   else {
2219     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2220     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2221   }
2222 
2223   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2224   for (i=0; i<m; i++) {
2225     ii   = i + rstart;
2226     nnz  = I[i+1]- I[i];
2227     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2228   }
2229   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2230   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2231   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2232 
2233   if (!v) {
2234     ierr = PetscFree(values);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 EXTERN_C_END
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2242 /*@C
2243    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2244    (the default parallel PETSc format).
2245 
2246    Collective on MPI_Comm
2247 
2248    Input Parameters:
2249 +  A - the matrix
2250 .  i - the indices into j for the start of each local row (starts with zero)
2251 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2252 -  v - optional values in the matrix
2253 
2254    Level: developer
2255 
2256 .keywords: matrix, aij, compressed row, sparse, parallel
2257 
2258 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2259 @*/
2260 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2261 {
2262   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2263 
2264   PetscFunctionBegin;
2265   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2266   if (f) {
2267     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2274 /*@C
2275    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2276    (the default parallel PETSc format).  For good matrix assembly performance
2277    the user should preallocate the matrix storage by setting the parameters
2278    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2279    performance can be increased by more than a factor of 50.
2280 
2281    Collective on MPI_Comm
2282 
2283    Input Parameters:
2284 +  A - the matrix
2285 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2286            (same value is used for all local rows)
2287 .  d_nnz - array containing the number of nonzeros in the various rows of the
2288            DIAGONAL portion of the local submatrix (possibly different for each row)
2289            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2290            The size of this array is equal to the number of local rows, i.e 'm'.
2291            You must leave room for the diagonal entry even if it is zero.
2292 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2293            submatrix (same value is used for all local rows).
2294 -  o_nnz - array containing the number of nonzeros in the various rows of the
2295            OFF-DIAGONAL portion of the local submatrix (possibly different for
2296            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2297            structure. The size of this array is equal to the number
2298            of local rows, i.e 'm'.
2299 
2300    If the *_nnz parameter is given then the *_nz parameter is ignored
2301 
2302    The AIJ format (also called the Yale sparse matrix format or
2303    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2304    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2305 
2306    The parallel matrix is partitioned such that the first m0 rows belong to
2307    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2308    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2309 
2310    The DIAGONAL portion of the local submatrix of a processor can be defined
2311    as the submatrix which is obtained by extraction the part corresponding
2312    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2313    first row that belongs to the processor, and r2 is the last row belonging
2314    to the this processor. This is a square mxm matrix. The remaining portion
2315    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2316 
2317    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2318 
2319    Example usage:
2320 
2321    Consider the following 8x8 matrix with 34 non-zero values, that is
2322    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2323    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2324    as follows:
2325 
2326 .vb
2327             1  2  0  |  0  3  0  |  0  4
2328     Proc0   0  5  6  |  7  0  0  |  8  0
2329             9  0 10  | 11  0  0  | 12  0
2330     -------------------------------------
2331            13  0 14  | 15 16 17  |  0  0
2332     Proc1   0 18  0  | 19 20 21  |  0  0
2333             0  0  0  | 22 23  0  | 24  0
2334     -------------------------------------
2335     Proc2  25 26 27  |  0  0 28  | 29  0
2336            30  0  0  | 31 32 33  |  0 34
2337 .ve
2338 
2339    This can be represented as a collection of submatrices as:
2340 
2341 .vb
2342       A B C
2343       D E F
2344       G H I
2345 .ve
2346 
2347    Where the submatrices A,B,C are owned by proc0, D,E,F are
2348    owned by proc1, G,H,I are owned by proc2.
2349 
2350    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2351    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2352    The 'M','N' parameters are 8,8, and have the same values on all procs.
2353 
2354    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2355    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2356    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2357    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2358    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2359    matrix, ans [DF] as another SeqAIJ matrix.
2360 
2361    When d_nz, o_nz parameters are specified, d_nz storage elements are
2362    allocated for every row of the local diagonal submatrix, and o_nz
2363    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2364    One way to choose d_nz and o_nz is to use the max nonzerors per local
2365    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2366    In this case, the values of d_nz,o_nz are:
2367 .vb
2368      proc0 : dnz = 2, o_nz = 2
2369      proc1 : dnz = 3, o_nz = 2
2370      proc2 : dnz = 1, o_nz = 4
2371 .ve
2372    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2373    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2374    for proc3. i.e we are using 12+15+10=37 storage locations to store
2375    34 values.
2376 
2377    When d_nnz, o_nnz parameters are specified, the storage is specified
2378    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2379    In the above case the values for d_nnz,o_nnz are:
2380 .vb
2381      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2382      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2383      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2384 .ve
2385    Here the space allocated is sum of all the above values i.e 34, and
2386    hence pre-allocation is perfect.
2387 
2388    Level: intermediate
2389 
2390 .keywords: matrix, aij, compressed row, sparse, parallel
2391 
2392 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2393           MPIAIJ
2394 @*/
2395 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2396 {
2397   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2398 
2399   PetscFunctionBegin;
2400   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2401   if (f) {
2402     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2403   }
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatCreateMPIAIJ"
2409 /*@C
2410    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2411    (the default parallel PETSc format).  For good matrix assembly performance
2412    the user should preallocate the matrix storage by setting the parameters
2413    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2414    performance can be increased by more than a factor of 50.
2415 
2416    Collective on MPI_Comm
2417 
2418    Input Parameters:
2419 +  comm - MPI communicator
2420 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2421            This value should be the same as the local size used in creating the
2422            y vector for the matrix-vector product y = Ax.
2423 .  n - This value should be the same as the local size used in creating the
2424        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2425        calculated if N is given) For square matrices n is almost always m.
2426 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2427 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2428 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2429            (same value is used for all local rows)
2430 .  d_nnz - array containing the number of nonzeros in the various rows of the
2431            DIAGONAL portion of the local submatrix (possibly different for each row)
2432            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2433            The size of this array is equal to the number of local rows, i.e 'm'.
2434            You must leave room for the diagonal entry even if it is zero.
2435 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2436            submatrix (same value is used for all local rows).
2437 -  o_nnz - array containing the number of nonzeros in the various rows of the
2438            OFF-DIAGONAL portion of the local submatrix (possibly different for
2439            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2440            structure. The size of this array is equal to the number
2441            of local rows, i.e 'm'.
2442 
2443    Output Parameter:
2444 .  A - the matrix
2445 
2446    Notes:
2447    If the *_nnz parameter is given then the *_nz parameter is ignored
2448 
2449    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2450    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2451    storage requirements for this matrix.
2452 
2453    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2454    processor than it must be used on all processors that share the object for
2455    that argument.
2456 
2457    The user MUST specify either the local or global matrix dimensions
2458    (possibly both).
2459 
2460    The parallel matrix is partitioned such that the first m0 rows belong to
2461    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2462    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2463 
2464    The DIAGONAL portion of the local submatrix of a processor can be defined
2465    as the submatrix which is obtained by extraction the part corresponding
2466    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2467    first row that belongs to the processor, and r2 is the last row belonging
2468    to the this processor. This is a square mxm matrix. The remaining portion
2469    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2470 
2471    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2472 
2473    When calling this routine with a single process communicator, a matrix of
2474    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2475    type of communicator, use the construction mechanism:
2476      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2477 
2478    By default, this format uses inodes (identical nodes) when possible.
2479    We search for consecutive rows with the same nonzero structure, thereby
2480    reusing matrix information to achieve increased efficiency.
2481 
2482    Options Database Keys:
2483 +  -mat_aij_no_inode  - Do not use inodes
2484 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2485 -  -mat_aij_oneindex - Internally use indexing starting at 1
2486         rather than 0.  Note that when calling MatSetValues(),
2487         the user still MUST index entries starting at 0!
2488 
2489 
2490    Example usage:
2491 
2492    Consider the following 8x8 matrix with 34 non-zero values, that is
2493    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2494    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2495    as follows:
2496 
2497 .vb
2498             1  2  0  |  0  3  0  |  0  4
2499     Proc0   0  5  6  |  7  0  0  |  8  0
2500             9  0 10  | 11  0  0  | 12  0
2501     -------------------------------------
2502            13  0 14  | 15 16 17  |  0  0
2503     Proc1   0 18  0  | 19 20 21  |  0  0
2504             0  0  0  | 22 23  0  | 24  0
2505     -------------------------------------
2506     Proc2  25 26 27  |  0  0 28  | 29  0
2507            30  0  0  | 31 32 33  |  0 34
2508 .ve
2509 
2510    This can be represented as a collection of submatrices as:
2511 
2512 .vb
2513       A B C
2514       D E F
2515       G H I
2516 .ve
2517 
2518    Where the submatrices A,B,C are owned by proc0, D,E,F are
2519    owned by proc1, G,H,I are owned by proc2.
2520 
2521    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2522    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2523    The 'M','N' parameters are 8,8, and have the same values on all procs.
2524 
2525    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2526    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2527    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2528    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2529    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2530    matrix, ans [DF] as another SeqAIJ matrix.
2531 
2532    When d_nz, o_nz parameters are specified, d_nz storage elements are
2533    allocated for every row of the local diagonal submatrix, and o_nz
2534    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2535    One way to choose d_nz and o_nz is to use the max nonzerors per local
2536    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2537    In this case, the values of d_nz,o_nz are:
2538 .vb
2539      proc0 : dnz = 2, o_nz = 2
2540      proc1 : dnz = 3, o_nz = 2
2541      proc2 : dnz = 1, o_nz = 4
2542 .ve
2543    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2544    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2545    for proc3. i.e we are using 12+15+10=37 storage locations to store
2546    34 values.
2547 
2548    When d_nnz, o_nnz parameters are specified, the storage is specified
2549    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2550    In the above case the values for d_nnz,o_nnz are:
2551 .vb
2552      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2553      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2554      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2555 .ve
2556    Here the space allocated is sum of all the above values i.e 34, and
2557    hence pre-allocation is perfect.
2558 
2559    Level: intermediate
2560 
2561 .keywords: matrix, aij, compressed row, sparse, parallel
2562 
2563 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2564           MPIAIJ
2565 @*/
2566 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2567 {
2568   PetscErrorCode ierr;
2569   PetscMPIInt    size;
2570 
2571   PetscFunctionBegin;
2572   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2574   if (size > 1) {
2575     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2576     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2577   } else {
2578     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2579     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2586 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2587 {
2588   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2589 
2590   PetscFunctionBegin;
2591   *Ad     = a->A;
2592   *Ao     = a->B;
2593   *colmap = a->garray;
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2599 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2600 {
2601   PetscErrorCode ierr;
2602   PetscInt       i;
2603   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2604 
2605   PetscFunctionBegin;
2606   if (coloring->ctype == IS_COLORING_LOCAL) {
2607     ISColoringValue *allcolors,*colors;
2608     ISColoring      ocoloring;
2609 
2610     /* set coloring for diagonal portion */
2611     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2612 
2613     /* set coloring for off-diagonal portion */
2614     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2615     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2616     for (i=0; i<a->B->n; i++) {
2617       colors[i] = allcolors[a->garray[i]];
2618     }
2619     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2620     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2621     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2622     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2623   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2624     ISColoringValue *colors;
2625     PetscInt             *larray;
2626     ISColoring      ocoloring;
2627 
2628     /* set coloring for diagonal portion */
2629     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2630     for (i=0; i<a->A->n; i++) {
2631       larray[i] = i + a->cstart;
2632     }
2633     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2634     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2635     for (i=0; i<a->A->n; i++) {
2636       colors[i] = coloring->colors[larray[i]];
2637     }
2638     ierr = PetscFree(larray);CHKERRQ(ierr);
2639     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2640     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2641     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2642 
2643     /* set coloring for off-diagonal portion */
2644     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2645     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2646     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2647     for (i=0; i<a->B->n; i++) {
2648       colors[i] = coloring->colors[larray[i]];
2649     }
2650     ierr = PetscFree(larray);CHKERRQ(ierr);
2651     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2652     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2653     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2654   } else {
2655     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2656   }
2657 
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #if defined(PETSC_HAVE_ADIC)
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2664 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2665 {
2666   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2667   PetscErrorCode ierr;
2668 
2669   PetscFunctionBegin;
2670   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2671   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2672   PetscFunctionReturn(0);
2673 }
2674 #endif
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2678 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2679 {
2680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2681   PetscErrorCode ierr;
2682 
2683   PetscFunctionBegin;
2684   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2685   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatMerge"
2691 /*@C
2692       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2693                  matrices from each processor
2694 
2695     Collective on MPI_Comm
2696 
2697    Input Parameters:
2698 +    comm - the communicators the parallel matrix will live on
2699 .    inmat - the input sequential matrices
2700 .    n - number of local columns (or PETSC_DECIDE)
2701 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2702 
2703    Output Parameter:
2704 .    outmat - the parallel matrix generated
2705 
2706     Level: advanced
2707 
2708    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2709 
2710 @*/
2711 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2712 {
2713   PetscErrorCode ierr;
2714   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2715   PetscInt       *indx;
2716   PetscScalar    *values;
2717   PetscMap       columnmap,rowmap;
2718 
2719   PetscFunctionBegin;
2720     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2721   /*
2722   PetscMPIInt       rank;
2723   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2724   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2725   */
2726   if (scall == MAT_INITIAL_MATRIX){
2727     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2728     if (n == PETSC_DECIDE){
2729       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2730       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2731       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2732       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2733       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2734     }
2735 
2736     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2737     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2738     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2739     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2740     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2741 
2742     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2743     for (i=0;i<m;i++) {
2744       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2745       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2746       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2747     }
2748     /* This routine will ONLY return MPIAIJ type matrix */
2749     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2750     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2751     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2752     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2753 
2754   } else if (scall == MAT_REUSE_MATRIX){
2755     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2756   } else {
2757     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2758   }
2759 
2760   for (i=0;i<m;i++) {
2761     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2762     I    = i + rstart;
2763     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2764     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2765   }
2766   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2767   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769 
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatFileSplit"
2775 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2776 {
2777   PetscErrorCode    ierr;
2778   PetscMPIInt       rank;
2779   PetscInt          m,N,i,rstart,nnz;
2780   size_t            len;
2781   const PetscInt    *indx;
2782   PetscViewer       out;
2783   char              *name;
2784   Mat               B;
2785   const PetscScalar *values;
2786 
2787   PetscFunctionBegin;
2788   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2789   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2790   /* Should this be the type of the diagonal block of A? */
2791   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2792   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2793   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2794   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2795   for (i=0;i<m;i++) {
2796     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2797     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2798     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2799   }
2800   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2801   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2802 
2803   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2804   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2805   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2806   sprintf(name,"%s.%d",outfile,rank);
2807   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2808   ierr = PetscFree(name);
2809   ierr = MatView(B,out);CHKERRQ(ierr);
2810   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2811   ierr = MatDestroy(B);CHKERRQ(ierr);
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2816 #undef __FUNCT__
2817 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2818 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2819 {
2820   PetscErrorCode       ierr;
2821   Mat_Merge_SeqsToMPI  *merge;
2822   PetscObjectContainer container;
2823 
2824   PetscFunctionBegin;
2825   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2826   if (container) {
2827     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2828     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2829     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2830     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2831     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2832     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2833     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2834     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2835     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2836     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2837     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2838     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2839 
2840     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2841     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2842   }
2843   ierr = PetscFree(merge);CHKERRQ(ierr);
2844 
2845   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 
2849 #include "src/mat/utils/freespace.h"
2850 #include "petscbt.h"
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatMerge_SeqsToMPI"
2853 /*@C
2854       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2855                  matrices from each processor
2856 
2857     Collective on MPI_Comm
2858 
2859    Input Parameters:
2860 +    comm - the communicators the parallel matrix will live on
2861 .    seqmat - the input sequential matrices
2862 .    m - number of local rows (or PETSC_DECIDE)
2863 .    n - number of local columns (or PETSC_DECIDE)
2864 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2865 
2866    Output Parameter:
2867 .    mpimat - the parallel matrix generated
2868 
2869     Level: advanced
2870 
2871    Notes:
2872      The dimensions of the sequential matrix in each processor MUST be the same.
2873      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2874      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2875 @*/
2876 static PetscEvent logkey_seqstompinum = 0;
2877 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2878 {
2879   PetscErrorCode       ierr;
2880   MPI_Comm             comm=mpimat->comm;
2881   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2882   PetscMPIInt          size,rank,taga,*len_s;
2883   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2884   PetscInt             proc,m;
2885   PetscInt             **buf_ri,**buf_rj;
2886   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2887   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2888   MPI_Request          *s_waits,*r_waits;
2889   MPI_Status           *status;
2890   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2891   Mat_Merge_SeqsToMPI  *merge;
2892   PetscObjectContainer container;
2893 
2894   PetscFunctionBegin;
2895   if (!logkey_seqstompinum) {
2896     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2897   }
2898   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2899 
2900   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2901   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2902 
2903   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2904   if (container) {
2905     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2906   }
2907   bi     = merge->bi;
2908   bj     = merge->bj;
2909   buf_ri = merge->buf_ri;
2910   buf_rj = merge->buf_rj;
2911 
2912   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2913   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2914   len_s  = merge->len_s;
2915 
2916   /* send and recv matrix values */
2917   /*-----------------------------*/
2918   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2919   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2920 
2921   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2922   for (proc=0,k=0; proc<size; proc++){
2923     if (!len_s[proc]) continue;
2924     i = owners[proc];
2925     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2926     k++;
2927   }
2928 
2929   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2930   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2931   ierr = PetscFree(status);CHKERRQ(ierr);
2932 
2933   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2934   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2935 
2936   /* insert mat values of mpimat */
2937   /*----------------------------*/
2938   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2939   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2940   nextrow = buf_ri_k + merge->nrecv;
2941   nextai  = nextrow + merge->nrecv;
2942 
2943   for (k=0; k<merge->nrecv; k++){
2944     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2945     nrows = *(buf_ri_k[k]);
2946     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2947     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2948   }
2949 
2950   /* set values of ba */
2951   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2952   for (i=0; i<m; i++) {
2953     arow = owners[rank] + i;
2954     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2955     bnzi = bi[i+1] - bi[i];
2956     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2957 
2958     /* add local non-zero vals of this proc's seqmat into ba */
2959     anzi = ai[arow+1] - ai[arow];
2960     aj   = a->j + ai[arow];
2961     aa   = a->a + ai[arow];
2962     nextaj = 0;
2963     for (j=0; nextaj<anzi; j++){
2964       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2965         ba_i[j] += aa[nextaj++];
2966       }
2967     }
2968 
2969     /* add received vals into ba */
2970     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2971       /* i-th row */
2972       if (i == *nextrow[k]) {
2973         anzi = *(nextai[k]+1) - *nextai[k];
2974         aj   = buf_rj[k] + *(nextai[k]);
2975         aa   = abuf_r[k] + *(nextai[k]);
2976         nextaj = 0;
2977         for (j=0; nextaj<anzi; j++){
2978           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2979             ba_i[j] += aa[nextaj++];
2980           }
2981         }
2982         nextrow[k]++; nextai[k]++;
2983       }
2984     }
2985     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2986   }
2987   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2988   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2989 
2990   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2991   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2992   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2993   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 static PetscEvent logkey_seqstompisym = 0;
2997 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2998 {
2999   PetscErrorCode       ierr;
3000   Mat                  B_mpi;
3001   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3002   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3003   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3004   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3005   PetscInt             len,proc,*dnz,*onz;
3006   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3007   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3008   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3009   MPI_Status           *status;
3010   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3011   PetscBT              lnkbt;
3012   Mat_Merge_SeqsToMPI  *merge;
3013   PetscObjectContainer container;
3014 
3015   PetscFunctionBegin;
3016   if (!logkey_seqstompisym) {
3017     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3018   }
3019   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3020 
3021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3022   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3023 
3024   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3025   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3026 
3027   /* determine row ownership */
3028   /*---------------------------------------------------------*/
3029   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3030   if (m == PETSC_DECIDE) {
3031     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3032   } else {
3033     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3034   }
3035   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3036   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3037   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3038 
3039   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3040   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3041 
3042   /* determine the number of messages to send, their lengths */
3043   /*---------------------------------------------------------*/
3044   len_s  = merge->len_s;
3045 
3046   len = 0;  /* length of buf_si[] */
3047   merge->nsend = 0;
3048   for (proc=0; proc<size; proc++){
3049     len_si[proc] = 0;
3050     if (proc == rank){
3051       len_s[proc] = 0;
3052     } else {
3053       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3054       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3055     }
3056     if (len_s[proc]) {
3057       merge->nsend++;
3058       nrows = 0;
3059       for (i=owners[proc]; i<owners[proc+1]; i++){
3060         if (ai[i+1] > ai[i]) nrows++;
3061       }
3062       len_si[proc] = 2*(nrows+1);
3063       len += len_si[proc];
3064     }
3065   }
3066 
3067   /* determine the number and length of messages to receive for ij-structure */
3068   /*-------------------------------------------------------------------------*/
3069   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3070   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3071 
3072   /* post the Irecv of j-structure */
3073   /*-------------------------------*/
3074   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3075   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3076 
3077   /* post the Isend of j-structure */
3078   /*--------------------------------*/
3079   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3080   sj_waits = si_waits + merge->nsend;
3081 
3082   for (proc=0, k=0; proc<size; proc++){
3083     if (!len_s[proc]) continue;
3084     i = owners[proc];
3085     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3086     k++;
3087   }
3088 
3089   /* receives and sends of j-structure are complete */
3090   /*------------------------------------------------*/
3091   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3092   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3093 
3094   /* send and recv i-structure */
3095   /*---------------------------*/
3096   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3097   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3098 
3099   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3100   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3101   for (proc=0,k=0; proc<size; proc++){
3102     if (!len_s[proc]) continue;
3103     /* form outgoing message for i-structure:
3104          buf_si[0]:                 nrows to be sent
3105                [1:nrows]:           row index (global)
3106                [nrows+1:2*nrows+1]: i-structure index
3107     */
3108     /*-------------------------------------------*/
3109     nrows = len_si[proc]/2 - 1;
3110     buf_si_i    = buf_si + nrows+1;
3111     buf_si[0]   = nrows;
3112     buf_si_i[0] = 0;
3113     nrows = 0;
3114     for (i=owners[proc]; i<owners[proc+1]; i++){
3115       anzi = ai[i+1] - ai[i];
3116       if (anzi) {
3117         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3118         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3119         nrows++;
3120       }
3121     }
3122     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3123     k++;
3124     buf_si += len_si[proc];
3125   }
3126 
3127   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3128   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3129 
3130   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3131   for (i=0; i<merge->nrecv; i++){
3132     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3133   }
3134 
3135   ierr = PetscFree(len_si);CHKERRQ(ierr);
3136   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3137   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3138   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3139   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3140   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3141   ierr = PetscFree(status);CHKERRQ(ierr);
3142 
3143   /* compute a local seq matrix in each processor */
3144   /*----------------------------------------------*/
3145   /* allocate bi array and free space for accumulating nonzero column info */
3146   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3147   bi[0] = 0;
3148 
3149   /* create and initialize a linked list */
3150   nlnk = N+1;
3151   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3152 
3153   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3154   len = 0;
3155   len  = ai[owners[rank+1]] - ai[owners[rank]];
3156   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3157   current_space = free_space;
3158 
3159   /* determine symbolic info for each local row */
3160   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3161   nextrow = buf_ri_k + merge->nrecv;
3162   nextai  = nextrow + merge->nrecv;
3163   for (k=0; k<merge->nrecv; k++){
3164     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3165     nrows = *buf_ri_k[k];
3166     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3167     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3168   }
3169 
3170   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3171   len = 0;
3172   for (i=0;i<m;i++) {
3173     bnzi   = 0;
3174     /* add local non-zero cols of this proc's seqmat into lnk */
3175     arow   = owners[rank] + i;
3176     anzi   = ai[arow+1] - ai[arow];
3177     aj     = a->j + ai[arow];
3178     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3179     bnzi += nlnk;
3180     /* add received col data into lnk */
3181     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3182       if (i == *nextrow[k]) { /* i-th row */
3183         anzi = *(nextai[k]+1) - *nextai[k];
3184         aj   = buf_rj[k] + *nextai[k];
3185         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3186         bnzi += nlnk;
3187         nextrow[k]++; nextai[k]++;
3188       }
3189     }
3190     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3191 
3192     /* if free space is not available, make more free space */
3193     if (current_space->local_remaining<bnzi) {
3194       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3195       nspacedouble++;
3196     }
3197     /* copy data into free space, then initialize lnk */
3198     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3199     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3200 
3201     current_space->array           += bnzi;
3202     current_space->local_used      += bnzi;
3203     current_space->local_remaining -= bnzi;
3204 
3205     bi[i+1] = bi[i] + bnzi;
3206   }
3207 
3208   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3209 
3210   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3211   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3212   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3213 
3214   /* create symbolic parallel matrix B_mpi */
3215   /*---------------------------------------*/
3216   if (n==PETSC_DECIDE) {
3217     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3218   } else {
3219     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3220   }
3221   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3222   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3223   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3224 
3225   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3226   B_mpi->assembled     = PETSC_FALSE;
3227   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3228   merge->bi            = bi;
3229   merge->bj            = bj;
3230   merge->buf_ri        = buf_ri;
3231   merge->buf_rj        = buf_rj;
3232   merge->coi           = PETSC_NULL;
3233   merge->coj           = PETSC_NULL;
3234   merge->owners_co     = PETSC_NULL;
3235 
3236   /* attach the supporting struct to B_mpi for reuse */
3237   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3238   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3239   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3240   *mpimat = B_mpi;
3241   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3242   PetscFunctionReturn(0);
3243 }
3244 
3245 static PetscEvent logkey_seqstompi = 0;
3246 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3247 {
3248   PetscErrorCode   ierr;
3249 
3250   PetscFunctionBegin;
3251   if (!logkey_seqstompi) {
3252     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3253   }
3254   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3255   if (scall == MAT_INITIAL_MATRIX){
3256     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3257   }
3258   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3259   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3260   PetscFunctionReturn(0);
3261 }
3262 static PetscEvent logkey_getlocalmat = 0;
3263 #undef __FUNCT__
3264 #define __FUNCT__ "MatGetLocalMat"
3265 /*@C
3266      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3267 
3268     Not Collective
3269 
3270    Input Parameters:
3271 +    A - the matrix
3272 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3273 
3274    Output Parameter:
3275 .    A_loc - the local sequential matrix generated
3276 
3277     Level: developer
3278 
3279 @*/
3280 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3281 {
3282   PetscErrorCode  ierr;
3283   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3284   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3285   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3286   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3287   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3288   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3289 
3290   PetscFunctionBegin;
3291   if (!logkey_getlocalmat) {
3292     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3293   }
3294   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3295   if (scall == MAT_INITIAL_MATRIX){
3296     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3297     ci[0] = 0;
3298     for (i=0; i<am; i++){
3299       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3300     }
3301     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3302     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3303     k = 0;
3304     for (i=0; i<am; i++) {
3305       ncols_o = bi[i+1] - bi[i];
3306       ncols_d = ai[i+1] - ai[i];
3307       /* off-diagonal portion of A */
3308       for (jo=0; jo<ncols_o; jo++) {
3309         col = cmap[*bj];
3310         if (col >= cstart) break;
3311         cj[k]   = col; bj++;
3312         ca[k++] = *ba++;
3313       }
3314       /* diagonal portion of A */
3315       for (j=0; j<ncols_d; j++) {
3316         cj[k]   = cstart + *aj++;
3317         ca[k++] = *aa++;
3318       }
3319       /* off-diagonal portion of A */
3320       for (j=jo; j<ncols_o; j++) {
3321         cj[k]   = cmap[*bj++];
3322         ca[k++] = *ba++;
3323       }
3324     }
3325     /* put together the new matrix */
3326     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3327     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3328     /* Since these are PETSc arrays, change flags to free them as necessary. */
3329     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3330     mat->freedata = PETSC_TRUE;
3331     mat->nonew    = 0;
3332   } else if (scall == MAT_REUSE_MATRIX){
3333     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3334     ci = mat->i; cj = mat->j; ca = mat->a;
3335     for (i=0; i<am; i++) {
3336       /* off-diagonal portion of A */
3337       ncols_o = bi[i+1] - bi[i];
3338       for (jo=0; jo<ncols_o; jo++) {
3339         col = cmap[*bj];
3340         if (col >= cstart) break;
3341         *ca++ = *ba++; bj++;
3342       }
3343       /* diagonal portion of A */
3344       ncols_d = ai[i+1] - ai[i];
3345       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3346       /* off-diagonal portion of A */
3347       for (j=jo; j<ncols_o; j++) {
3348         *ca++ = *ba++; bj++;
3349       }
3350     }
3351   } else {
3352     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3353   }
3354 
3355   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3356   PetscFunctionReturn(0);
3357 }
3358 
3359 static PetscEvent logkey_getlocalmatcondensed = 0;
3360 #undef __FUNCT__
3361 #define __FUNCT__ "MatGetLocalMatCondensed"
3362 /*@C
3363      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3364 
3365     Not Collective
3366 
3367    Input Parameters:
3368 +    A - the matrix
3369 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3370 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3371 
3372    Output Parameter:
3373 .    A_loc - the local sequential matrix generated
3374 
3375     Level: developer
3376 
3377 @*/
3378 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3379 {
3380   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3381   PetscErrorCode    ierr;
3382   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3383   IS                isrowa,iscola;
3384   Mat               *aloc;
3385 
3386   PetscFunctionBegin;
3387   if (!logkey_getlocalmatcondensed) {
3388     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3389   }
3390   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3391   if (!row){
3392     start = a->rstart; end = a->rend;
3393     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3394   } else {
3395     isrowa = *row;
3396   }
3397   if (!col){
3398     start = a->cstart;
3399     cmap  = a->garray;
3400     nzA   = a->A->n;
3401     nzB   = a->B->n;
3402     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3403     ncols = 0;
3404     for (i=0; i<nzB; i++) {
3405       if (cmap[i] < start) idx[ncols++] = cmap[i];
3406       else break;
3407     }
3408     imark = i;
3409     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3410     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3411     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3412     ierr = PetscFree(idx);CHKERRQ(ierr);
3413   } else {
3414     iscola = *col;
3415   }
3416   if (scall != MAT_INITIAL_MATRIX){
3417     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3418     aloc[0] = *A_loc;
3419   }
3420   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3421   *A_loc = aloc[0];
3422   ierr = PetscFree(aloc);CHKERRQ(ierr);
3423   if (!row){
3424     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3425   }
3426   if (!col){
3427     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3428   }
3429   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 static PetscEvent logkey_GetBrowsOfAcols = 0;
3434 #undef __FUNCT__
3435 #define __FUNCT__ "MatGetBrowsOfAcols"
3436 /*@C
3437     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3438 
3439     Collective on Mat
3440 
3441    Input Parameters:
3442 +    A,B - the matrices in mpiaij format
3443 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3444 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3445 
3446    Output Parameter:
3447 +    rowb, colb - index sets of rows and columns of B to extract
3448 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3449 -    B_seq - the sequential matrix generated
3450 
3451     Level: developer
3452 
3453 @*/
3454 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3455 {
3456   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3457   PetscErrorCode    ierr;
3458   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3459   IS                isrowb,iscolb;
3460   Mat               *bseq;
3461 
3462   PetscFunctionBegin;
3463   if (a->cstart != b->rstart || a->cend != b->rend){
3464     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3465   }
3466   if (!logkey_GetBrowsOfAcols) {
3467     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3468   }
3469   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3470 
3471   if (scall == MAT_INITIAL_MATRIX){
3472     start = a->cstart;
3473     cmap  = a->garray;
3474     nzA   = a->A->n;
3475     nzB   = a->B->n;
3476     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3477     ncols = 0;
3478     for (i=0; i<nzB; i++) {  /* row < local row index */
3479       if (cmap[i] < start) idx[ncols++] = cmap[i];
3480       else break;
3481     }
3482     imark = i;
3483     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3484     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3485     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3486     ierr = PetscFree(idx);CHKERRQ(ierr);
3487     *brstart = imark;
3488     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3489   } else {
3490     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3491     isrowb = *rowb; iscolb = *colb;
3492     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3493     bseq[0] = *B_seq;
3494   }
3495   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3496   *B_seq = bseq[0];
3497   ierr = PetscFree(bseq);CHKERRQ(ierr);
3498   if (!rowb){
3499     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3500   } else {
3501     *rowb = isrowb;
3502   }
3503   if (!colb){
3504     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3505   } else {
3506     *colb = iscolb;
3507   }
3508   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3509   PetscFunctionReturn(0);
3510 }
3511 
3512 static PetscEvent logkey_GetBrowsOfAocols = 0;
3513 #undef __FUNCT__
3514 #define __FUNCT__ "MatGetBrowsOfAoCols"
3515 /*@C
3516     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3517     of the OFF-DIAGONAL portion of local A
3518 
3519     Collective on Mat
3520 
3521    Input Parameters:
3522 +    A,B - the matrices in mpiaij format
3523 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3524 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3525 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3526 
3527    Output Parameter:
3528 +    B_oth - the sequential matrix generated
3529 
3530     Level: developer
3531 
3532 @*/
3533 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3534 {
3535   VecScatter_MPI_General *gen_to,*gen_from;
3536   PetscErrorCode         ierr;
3537   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3538   Mat_SeqAIJ             *b_oth;
3539   VecScatter             ctx=a->Mvctx;
3540   MPI_Comm               comm=ctx->comm;
3541   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3542   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3543   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3544   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3545   MPI_Request            *rwaits,*swaits;
3546   MPI_Status             *sstatus,rstatus;
3547   PetscInt               *cols;
3548   PetscScalar            *vals;
3549   PetscMPIInt            j;
3550 
3551   PetscFunctionBegin;
3552   if (a->cstart != b->rstart || a->cend != b->rend){
3553     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3554   }
3555   if (!logkey_GetBrowsOfAocols) {
3556     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3557   }
3558   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3559   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3560 
3561   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3562   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3563   rvalues  = gen_from->values; /* holds the length of sending row */
3564   svalues  = gen_to->values;   /* holds the length of receiving row */
3565   nrecvs   = gen_from->n;
3566   nsends   = gen_to->n;
3567   rwaits   = gen_from->requests;
3568   swaits   = gen_to->requests;
3569   srow     = gen_to->indices;   /* local row index to be sent */
3570   rstarts  = gen_from->starts;
3571   sstarts  = gen_to->starts;
3572   rprocs   = gen_from->procs;
3573   sprocs   = gen_to->procs;
3574   sstatus  = gen_to->sstatus;
3575 
3576   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3577   if (scall == MAT_INITIAL_MATRIX){
3578     /* i-array */
3579     /*---------*/
3580     /*  post receives */
3581     for (i=0; i<nrecvs; i++){
3582       rowlen = (PetscInt*)rvalues + rstarts[i];
3583       nrows = rstarts[i+1]-rstarts[i];
3584       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3585     }
3586 
3587     /* pack the outgoing message */
3588     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3589     rstartsj = sstartsj + nsends +1;
3590     sstartsj[0] = 0;  rstartsj[0] = 0;
3591     len = 0; /* total length of j or a array to be sent */
3592     k = 0;
3593     for (i=0; i<nsends; i++){
3594       rowlen = (PetscInt*)svalues + sstarts[i];
3595       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3596       for (j=0; j<nrows; j++) {
3597         row = srow[k] + b->rowners[rank]; /* global row idx */
3598         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3599         len += rowlen[j];
3600         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3601         k++;
3602       }
3603       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3604        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3605     }
3606     /* recvs and sends of i-array are completed */
3607     i = nrecvs;
3608     while (i--) {
3609       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3610     }
3611     if (nsends) {
3612       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3613     }
3614     /* allocate buffers for sending j and a arrays */
3615     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3616     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3617 
3618     /* create i-array of B_oth */
3619     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3620     b_othi[0] = 0;
3621     len = 0; /* total length of j or a array to be received */
3622     k = 0;
3623     for (i=0; i<nrecvs; i++){
3624       rowlen = (PetscInt*)rvalues + rstarts[i];
3625       nrows = rstarts[i+1]-rstarts[i];
3626       for (j=0; j<nrows; j++) {
3627         b_othi[k+1] = b_othi[k] + rowlen[j];
3628         len += rowlen[j]; k++;
3629       }
3630       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3631     }
3632 
3633     /* allocate space for j and a arrrays of B_oth */
3634     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3635     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3636 
3637     /* j-array */
3638     /*---------*/
3639     /*  post receives of j-array */
3640     for (i=0; i<nrecvs; i++){
3641       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3642       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3643     }
3644     k = 0;
3645     for (i=0; i<nsends; i++){
3646       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3647       bufJ = bufj+sstartsj[i];
3648       for (j=0; j<nrows; j++) {
3649         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3650         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3651         for (l=0; l<ncols; l++){
3652           *bufJ++ = cols[l];
3653         }
3654         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3655       }
3656       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3657     }
3658 
3659     /* recvs and sends of j-array are completed */
3660     i = nrecvs;
3661     while (i--) {
3662       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3663     }
3664     if (nsends) {
3665       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3666     }
3667   } else if (scall == MAT_REUSE_MATRIX){
3668     sstartsj = *startsj;
3669     rstartsj = sstartsj + nsends +1;
3670     bufa     = *bufa_ptr;
3671     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3672     b_otha   = b_oth->a;
3673   } else {
3674     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3675   }
3676 
3677   /* a-array */
3678   /*---------*/
3679   /*  post receives of a-array */
3680   for (i=0; i<nrecvs; i++){
3681     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3682     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3683   }
3684   k = 0;
3685   for (i=0; i<nsends; i++){
3686     nrows = sstarts[i+1]-sstarts[i];
3687     bufA = bufa+sstartsj[i];
3688     for (j=0; j<nrows; j++) {
3689       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3690       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3691       for (l=0; l<ncols; l++){
3692         *bufA++ = vals[l];
3693       }
3694       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3695 
3696     }
3697     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3698   }
3699   /* recvs and sends of a-array are completed */
3700   i = nrecvs;
3701   while (i--) {
3702     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3703   }
3704    if (nsends) {
3705     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3706   }
3707 
3708   if (scall == MAT_INITIAL_MATRIX){
3709     /* put together the new matrix */
3710     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3711 
3712     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3713     /* Since these are PETSc arrays, change flags to free them as necessary. */
3714     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3715     b_oth->freedata = PETSC_TRUE;
3716     b_oth->nonew    = 0;
3717 
3718     ierr = PetscFree(bufj);CHKERRQ(ierr);
3719     if (!startsj || !bufa_ptr){
3720       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3721       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3722     } else {
3723       *startsj  = sstartsj;
3724       *bufa_ptr = bufa;
3725     }
3726   }
3727   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3728 
3729   PetscFunctionReturn(0);
3730 }
3731 
3732 /*MC
3733    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3734 
3735    Options Database Keys:
3736 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3737 
3738   Level: beginner
3739 
3740 .seealso: MatCreateMPIAIJ
3741 M*/
3742 
3743 EXTERN_C_BEGIN
3744 #undef __FUNCT__
3745 #define __FUNCT__ "MatCreate_MPIAIJ"
3746 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3747 {
3748   Mat_MPIAIJ     *b;
3749   PetscErrorCode ierr;
3750   PetscInt       i;
3751   PetscMPIInt    size;
3752 
3753   PetscFunctionBegin;
3754   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3755 
3756   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3757   B->data         = (void*)b;
3758   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3759   B->factor       = 0;
3760   B->bs           = 1;
3761   B->assembled    = PETSC_FALSE;
3762   B->mapping      = 0;
3763 
3764   B->insertmode      = NOT_SET_VALUES;
3765   b->size            = size;
3766   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3767 
3768   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3769   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3770 
3771   /* the information in the maps duplicates the information computed below, eventually
3772      we should remove the duplicate information that is not contained in the maps */
3773   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3774   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3775 
3776   /* build local table of row and column ownerships */
3777   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3778   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3779   b->cowners = b->rowners + b->size + 2;
3780   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3781   b->rowners[0] = 0;
3782   for (i=2; i<=b->size; i++) {
3783     b->rowners[i] += b->rowners[i-1];
3784   }
3785   b->rstart = b->rowners[b->rank];
3786   b->rend   = b->rowners[b->rank+1];
3787   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3788   b->cowners[0] = 0;
3789   for (i=2; i<=b->size; i++) {
3790     b->cowners[i] += b->cowners[i-1];
3791   }
3792   b->cstart = b->cowners[b->rank];
3793   b->cend   = b->cowners[b->rank+1];
3794 
3795   /* build cache for off array entries formed */
3796   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3797   b->donotstash  = PETSC_FALSE;
3798   b->colmap      = 0;
3799   b->garray      = 0;
3800   b->roworiented = PETSC_TRUE;
3801 
3802   /* stuff used for matrix vector multiply */
3803   b->lvec      = PETSC_NULL;
3804   b->Mvctx     = PETSC_NULL;
3805 
3806   /* stuff for MatGetRow() */
3807   b->rowindices   = 0;
3808   b->rowvalues    = 0;
3809   b->getrowactive = PETSC_FALSE;
3810 
3811   /* Explicitly create 2 MATSEQAIJ matrices. */
3812   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3813   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3814   PetscLogObjectParent(B,b->A);
3815   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3816   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3817   PetscLogObjectParent(B,b->B);
3818 
3819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3820                                      "MatStoreValues_MPIAIJ",
3821                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3823                                      "MatRetrieveValues_MPIAIJ",
3824                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3826 				     "MatGetDiagonalBlock_MPIAIJ",
3827                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3829 				     "MatIsTranspose_MPIAIJ",
3830 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3832 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3833 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3835 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3836 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3838 				     "MatDiagonalScaleLocal_MPIAIJ",
3839 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 EXTERN_C_END
3843