xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c71e6ed79470f80f46156e2e6d79aab3dd836ca2)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444 #if defined(PETSC_DEBUG)
445   PetscTruth     found = PETSC_FALSE;
446 #endif
447 
448   PetscFunctionBegin;
449   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
450   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
451 
452   /*  first count number of contributors to each processor */
453   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
454   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
455   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
456   j = 0;
457   for (i=0; i<N; i++) {
458     if (lastidx > (idx = rows[i])) j = 0;
459     lastidx = idx;
460     for (; j<size; j++) {
461       if (idx >= owners[j] && idx < owners[j+1]) {
462         nprocs[2*j]++;
463         nprocs[2*j+1] = 1;
464         owner[i] = j;
465 #if defined(PETSC_DEBUG)
466         found = PETSC_TRUE;
467 #endif
468         break;
469       }
470     }
471 #if defined(PETSC_DEBUG)
472     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
473     found = PETSC_FALSE;
474 #endif
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595   PetscInt       nt;
596 
597   PetscFunctionBegin;
598   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
599   if (nt != A->n) {
600     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
601   }
602   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
603   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
604   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
605   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 #undef __FUNCT__
610 #define __FUNCT__ "MatMultAdd_MPIAIJ"
611 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
612 {
613   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
614   PetscErrorCode ierr;
615 
616   PetscFunctionBegin;
617   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
618   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
619   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
620   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
621   PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
626 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
627 {
628   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
629   PetscErrorCode ierr;
630   PetscTruth     merged;
631 
632   PetscFunctionBegin;
633   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
634   /* do nondiagonal part */
635   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
636   if (!merged) {
637     /* send it on its way */
638     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
639     /* do local part */
640     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
641     /* receive remote parts: note this assumes the values are not actually */
642     /* added in yy until the next line, */
643     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   } else {
645     /* do local part */
646     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
647     /* send it on its way */
648     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649     /* values actually were received in the Begin() but we need to call this nop */
650     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   }
652   PetscFunctionReturn(0);
653 }
654 
655 EXTERN_C_BEGIN
656 #undef __FUNCT__
657 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
658 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
659 {
660   MPI_Comm       comm;
661   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
662   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
663   IS             Me,Notme;
664   PetscErrorCode ierr;
665   PetscInt       M,N,first,last,*notme,i;
666   PetscMPIInt    size;
667 
668   PetscFunctionBegin;
669 
670   /* Easy test: symmetric diagonal block */
671   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
672   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
673   if (!*f) PetscFunctionReturn(0);
674   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
676   if (size == 1) PetscFunctionReturn(0);
677 
678   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
679   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
680   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
681   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
682   for (i=0; i<first; i++) notme[i] = i;
683   for (i=last; i<M; i++) notme[i-last+first] = i;
684   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
685   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
686   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
687   Aoff = Aoffs[0];
688   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
689   Boff = Boffs[0];
690   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
691   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
692   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
693   ierr = ISDestroy(Me);CHKERRQ(ierr);
694   ierr = ISDestroy(Notme);CHKERRQ(ierr);
695 
696   PetscFunctionReturn(0);
697 }
698 EXTERN_C_END
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
702 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
703 {
704   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
705   PetscErrorCode ierr;
706 
707   PetscFunctionBegin;
708   /* do nondiagonal part */
709   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
710   /* send it on its way */
711   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
712   /* do local part */
713   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
714   /* receive remote parts */
715   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 /*
720   This only works correctly for square matrices where the subblock A->A is the
721    diagonal block
722 */
723 #undef __FUNCT__
724 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
725 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
726 {
727   PetscErrorCode ierr;
728   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
729 
730   PetscFunctionBegin;
731   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
732   if (a->rstart != a->cstart || a->rend != a->cend) {
733     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
734   }
735   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatScale_MPIAIJ"
741 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
742 {
743   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
748   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "MatDestroy_MPIAIJ"
754 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
755 {
756   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
757   PetscErrorCode ierr;
758 
759   PetscFunctionBegin;
760 #if defined(PETSC_USE_LOG)
761   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
762 #endif
763   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
764   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
765   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
766   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
767 #if defined (PETSC_USE_CTABLE)
768   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
769 #else
770   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
771 #endif
772   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
773   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
774   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
775   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
776   ierr = PetscFree(aij);CHKERRQ(ierr);
777 
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPIAIJ_Binary"
790 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
793   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
794   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
795   PetscErrorCode    ierr;
796   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
797   int               fd;
798   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
799   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
800   PetscScalar       *column_values;
801 
802   PetscFunctionBegin;
803   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
804   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
805   nz   = A->nz + B->nz;
806   if (!rank) {
807     header[0] = MAT_FILE_COOKIE;
808     header[1] = mat->M;
809     header[2] = mat->N;
810     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
811     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
812     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
813     /* get largest number of rows any processor has */
814     rlen = mat->m;
815     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
816     for (i=1; i<size; i++) {
817       rlen = PetscMax(rlen,range[i+1] - range[i]);
818     }
819   } else {
820     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
821     rlen = mat->m;
822   }
823 
824   /* load up the local row counts */
825   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
826   for (i=0; i<mat->m; i++) {
827     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
828   }
829 
830   /* store the row lengths to the file */
831   if (!rank) {
832     MPI_Status status;
833     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
834     for (i=1; i<size; i++) {
835       rlen = range[i+1] - range[i];
836       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
837       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
838     }
839   } else {
840     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
843 
844   /* load up the local column indices */
845   nzmax = nz; /* )th processor needs space a largest processor needs */
846   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
847   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
848   cnt  = 0;
849   for (i=0; i<mat->m; i++) {
850     for (j=B->i[i]; j<B->i[i+1]; j++) {
851       if ( (col = garray[B->j[j]]) > cstart) break;
852       column_indices[cnt++] = col;
853     }
854     for (k=A->i[i]; k<A->i[i+1]; k++) {
855       column_indices[cnt++] = A->j[k] + cstart;
856     }
857     for (; j<B->i[i+1]; j++) {
858       column_indices[cnt++] = garray[B->j[j]];
859     }
860   }
861   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
862 
863   /* store the column indices to the file */
864   if (!rank) {
865     MPI_Status status;
866     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
867     for (i=1; i<size; i++) {
868       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
869       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
870       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
871       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
872     }
873   } else {
874     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
875     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
876   }
877   ierr = PetscFree(column_indices);CHKERRQ(ierr);
878 
879   /* load up the local column values */
880   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
881   cnt  = 0;
882   for (i=0; i<mat->m; i++) {
883     for (j=B->i[i]; j<B->i[i+1]; j++) {
884       if ( garray[B->j[j]] > cstart) break;
885       column_values[cnt++] = B->a[j];
886     }
887     for (k=A->i[i]; k<A->i[i+1]; k++) {
888       column_values[cnt++] = A->a[k];
889     }
890     for (; j<B->i[i+1]; j++) {
891       column_values[cnt++] = B->a[j];
892     }
893   }
894   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
895 
896   /* store the column values to the file */
897   if (!rank) {
898     MPI_Status status;
899     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
900     for (i=1; i<size; i++) {
901       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
902       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
903       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
904       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
905     }
906   } else {
907     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
908     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
909   }
910   ierr = PetscFree(column_values);CHKERRQ(ierr);
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
916 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
917 {
918   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
919   PetscErrorCode    ierr;
920   PetscMPIInt       rank = aij->rank,size = aij->size;
921   PetscTruth        isdraw,iascii,isbinary;
922   PetscViewer       sviewer;
923   PetscViewerFormat format;
924 
925   PetscFunctionBegin;
926   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
927   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
928   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
929   if (iascii) {
930     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
931     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
932       MatInfo    info;
933       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
934       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
935       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
936       if (!a->inode.size) {
937         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
938 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
939       } else {
940         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
941 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
942       }
943       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
944       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
945       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
946       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
947       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
948       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
949       PetscFunctionReturn(0);
950     } else if (format == PETSC_VIEWER_ASCII_INFO) {
951       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
952       if (a->inode.size) {
953         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
954       } else {
955         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
956       }
957       PetscFunctionReturn(0);
958     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
959       PetscFunctionReturn(0);
960     }
961   } else if (isbinary) {
962     if (size == 1) {
963       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
964       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
965     } else {
966       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
967     }
968     PetscFunctionReturn(0);
969   } else if (isdraw) {
970     PetscDraw  draw;
971     PetscTruth isnull;
972     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
973     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
974   }
975 
976   if (size == 1) {
977     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
978     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
979   } else {
980     /* assemble the entire matrix onto first processor. */
981     Mat         A;
982     Mat_SeqAIJ  *Aloc;
983     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
984     PetscScalar *a;
985 
986     if (!rank) {
987       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
988     } else {
989       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
990     }
991     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
992     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
993     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
994     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
995 
996     /* copy over the A part */
997     Aloc = (Mat_SeqAIJ*)aij->A->data;
998     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
999     row = aij->rstart;
1000     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1001     for (i=0; i<m; i++) {
1002       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1003       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1004     }
1005     aj = Aloc->j;
1006     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1007 
1008     /* copy over the B part */
1009     Aloc = (Mat_SeqAIJ*)aij->B->data;
1010     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     row  = aij->rstart;
1012     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1013     ct   = cols;
1014     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1015     for (i=0; i<m; i++) {
1016       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1017       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1018     }
1019     ierr = PetscFree(ct);CHKERRQ(ierr);
1020     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1021     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1022     /*
1023        Everyone has to call to draw the matrix since the graphics waits are
1024        synchronized across all processors that share the PetscDraw object
1025     */
1026     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1027     if (!rank) {
1028       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1029       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1030     }
1031     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1032     ierr = MatDestroy(A);CHKERRQ(ierr);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatView_MPIAIJ"
1039 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1040 {
1041   PetscErrorCode ierr;
1042   PetscTruth     iascii,isdraw,issocket,isbinary;
1043 
1044   PetscFunctionBegin;
1045   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1046   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1047   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1048   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1049   if (iascii || isdraw || isbinary || issocket) {
1050     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1051   } else {
1052     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRelax_MPIAIJ"
1061 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1062 {
1063   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1064   PetscErrorCode ierr;
1065   Vec            bb1;
1066   PetscScalar    mone=-1.0;
1067 
1068   PetscFunctionBegin;
1069   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1070 
1071   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1072 
1073   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1074     if (flag & SOR_ZERO_INITIAL_GUESS) {
1075       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1076       its--;
1077     }
1078 
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1092     if (flag & SOR_ZERO_INITIAL_GUESS) {
1093       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1094       its--;
1095     }
1096     while (its--) {
1097       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1098       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1099 
1100       /* update rhs: bb1 = bb - B*x */
1101       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1102       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1103 
1104       /* local sweep */
1105       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1106       CHKERRQ(ierr);
1107     }
1108   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1109     if (flag & SOR_ZERO_INITIAL_GUESS) {
1110       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1111       its--;
1112     }
1113     while (its--) {
1114       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1115       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1116 
1117       /* update rhs: bb1 = bb - B*x */
1118       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1119       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1120 
1121       /* local sweep */
1122       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1123       CHKERRQ(ierr);
1124     }
1125   } else {
1126     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1127   }
1128 
1129   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1135 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1136 {
1137   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1138   Mat            A = mat->A,B = mat->B;
1139   PetscErrorCode ierr;
1140   PetscReal      isend[5],irecv[5];
1141 
1142   PetscFunctionBegin;
1143   info->block_size     = 1.0;
1144   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1145   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1146   isend[3] = info->memory;  isend[4] = info->mallocs;
1147   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1148   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1149   isend[3] += info->memory;  isend[4] += info->mallocs;
1150   if (flag == MAT_LOCAL) {
1151     info->nz_used      = isend[0];
1152     info->nz_allocated = isend[1];
1153     info->nz_unneeded  = isend[2];
1154     info->memory       = isend[3];
1155     info->mallocs      = isend[4];
1156   } else if (flag == MAT_GLOBAL_MAX) {
1157     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1158     info->nz_used      = irecv[0];
1159     info->nz_allocated = irecv[1];
1160     info->nz_unneeded  = irecv[2];
1161     info->memory       = irecv[3];
1162     info->mallocs      = irecv[4];
1163   } else if (flag == MAT_GLOBAL_SUM) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   }
1171   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1172   info->fill_ratio_needed = 0;
1173   info->factor_mallocs    = 0;
1174   info->rows_global       = (double)matin->M;
1175   info->columns_global    = (double)matin->N;
1176   info->rows_local        = (double)matin->m;
1177   info->columns_local     = (double)matin->N;
1178 
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatSetOption_MPIAIJ"
1184 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1185 {
1186   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   switch (op) {
1191   case MAT_NO_NEW_NONZERO_LOCATIONS:
1192   case MAT_YES_NEW_NONZERO_LOCATIONS:
1193   case MAT_COLUMNS_UNSORTED:
1194   case MAT_COLUMNS_SORTED:
1195   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1196   case MAT_KEEP_ZEROED_ROWS:
1197   case MAT_NEW_NONZERO_LOCATION_ERR:
1198   case MAT_USE_INODES:
1199   case MAT_DO_NOT_USE_INODES:
1200   case MAT_IGNORE_ZERO_ENTRIES:
1201     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = PETSC_TRUE;
1206     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1208     break;
1209   case MAT_ROWS_SORTED:
1210   case MAT_ROWS_UNSORTED:
1211   case MAT_YES_NEW_DIAGONALS:
1212     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1213     break;
1214   case MAT_COLUMN_ORIENTED:
1215     a->roworiented = PETSC_FALSE;
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_IGNORE_OFF_PROC_ENTRIES:
1220     a->donotstash = PETSC_TRUE;
1221     break;
1222   case MAT_NO_NEW_DIAGONALS:
1223     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1224   case MAT_SYMMETRIC:
1225   case MAT_STRUCTURALLY_SYMMETRIC:
1226   case MAT_HERMITIAN:
1227   case MAT_SYMMETRY_ETERNAL:
1228     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1229     break;
1230   case MAT_NOT_SYMMETRIC:
1231   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1232   case MAT_NOT_HERMITIAN:
1233   case MAT_NOT_SYMMETRY_ETERNAL:
1234     break;
1235   default:
1236     SETERRQ(PETSC_ERR_SUP,"unknown option");
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetRow_MPIAIJ"
1243 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1244 {
1245   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1246   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1247   PetscErrorCode ierr;
1248   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1249   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1250   PetscInt       *cmap,*idx_p;
1251 
1252   PetscFunctionBegin;
1253   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1254   mat->getrowactive = PETSC_TRUE;
1255 
1256   if (!mat->rowvalues && (idx || v)) {
1257     /*
1258         allocate enough space to hold information from the longest row.
1259     */
1260     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1261     PetscInt     max = 1,tmp;
1262     for (i=0; i<matin->m; i++) {
1263       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1264       if (max < tmp) { max = tmp; }
1265     }
1266     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1267     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1268   }
1269 
1270   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1271   lrow = row - rstart;
1272 
1273   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1274   if (!v)   {pvA = 0; pvB = 0;}
1275   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1276   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1277   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1278   nztot = nzA + nzB;
1279 
1280   cmap  = mat->garray;
1281   if (v  || idx) {
1282     if (nztot) {
1283       /* Sort by increasing column numbers, assuming A and B already sorted */
1284       PetscInt imark = -1;
1285       if (v) {
1286         *v = v_p = mat->rowvalues;
1287         for (i=0; i<nzB; i++) {
1288           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1289           else break;
1290         }
1291         imark = i;
1292         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1293         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1294       }
1295       if (idx) {
1296         *idx = idx_p = mat->rowindices;
1297         if (imark > -1) {
1298           for (i=0; i<imark; i++) {
1299             idx_p[i] = cmap[cworkB[i]];
1300           }
1301         } else {
1302           for (i=0; i<nzB; i++) {
1303             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1304             else break;
1305           }
1306           imark = i;
1307         }
1308         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1309         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1310       }
1311     } else {
1312       if (idx) *idx = 0;
1313       if (v)   *v   = 0;
1314     }
1315   }
1316   *nz = nztot;
1317   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1318   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1324 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1327 
1328   PetscFunctionBegin;
1329   if (!aij->getrowactive) {
1330     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1331   }
1332   aij->getrowactive = PETSC_FALSE;
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatNorm_MPIAIJ"
1338 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1339 {
1340   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1341   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1342   PetscErrorCode ierr;
1343   PetscInt       i,j,cstart = aij->cstart;
1344   PetscReal      sum = 0.0;
1345   PetscScalar    *v;
1346 
1347   PetscFunctionBegin;
1348   if (aij->size == 1) {
1349     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1350   } else {
1351     if (type == NORM_FROBENIUS) {
1352       v = amat->a;
1353       for (i=0; i<amat->nz; i++) {
1354 #if defined(PETSC_USE_COMPLEX)
1355         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1356 #else
1357         sum += (*v)*(*v); v++;
1358 #endif
1359       }
1360       v = bmat->a;
1361       for (i=0; i<bmat->nz; i++) {
1362 #if defined(PETSC_USE_COMPLEX)
1363         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1364 #else
1365         sum += (*v)*(*v); v++;
1366 #endif
1367       }
1368       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1369       *norm = sqrt(*norm);
1370     } else if (type == NORM_1) { /* max column norm */
1371       PetscReal *tmp,*tmp2;
1372       PetscInt    *jj,*garray = aij->garray;
1373       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1374       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1375       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1376       *norm = 0.0;
1377       v = amat->a; jj = amat->j;
1378       for (j=0; j<amat->nz; j++) {
1379         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1380       }
1381       v = bmat->a; jj = bmat->j;
1382       for (j=0; j<bmat->nz; j++) {
1383         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1384       }
1385       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       for (j=0; j<mat->N; j++) {
1387         if (tmp2[j] > *norm) *norm = tmp2[j];
1388       }
1389       ierr = PetscFree(tmp);CHKERRQ(ierr);
1390       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1391     } else if (type == NORM_INFINITY) { /* max row norm */
1392       PetscReal ntemp = 0.0;
1393       for (j=0; j<aij->A->m; j++) {
1394         v = amat->a + amat->i[j];
1395         sum = 0.0;
1396         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1397           sum += PetscAbsScalar(*v); v++;
1398         }
1399         v = bmat->a + bmat->i[j];
1400         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1401           sum += PetscAbsScalar(*v); v++;
1402         }
1403         if (sum > ntemp) ntemp = sum;
1404       }
1405       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1406     } else {
1407       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1408     }
1409   }
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatTranspose_MPIAIJ"
1415 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1416 {
1417   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1418   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1421   Mat            B;
1422   PetscScalar    *array;
1423 
1424   PetscFunctionBegin;
1425   if (!matout && M != N) {
1426     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1427   }
1428 
1429   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1430   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1431   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1432 
1433   /* copy over the A part */
1434   Aloc = (Mat_SeqAIJ*)a->A->data;
1435   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1436   row = a->rstart;
1437   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1438   for (i=0; i<m; i++) {
1439     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1440     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1441   }
1442   aj = Aloc->j;
1443   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1444 
1445   /* copy over the B part */
1446   Aloc = (Mat_SeqAIJ*)a->B->data;
1447   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1448   row  = a->rstart;
1449   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1450   ct   = cols;
1451   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1452   for (i=0; i<m; i++) {
1453     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1454     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1455   }
1456   ierr = PetscFree(ct);CHKERRQ(ierr);
1457   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1458   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459   if (matout) {
1460     *matout = B;
1461   } else {
1462     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1469 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1470 {
1471   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1472   Mat            a = aij->A,b = aij->B;
1473   PetscErrorCode ierr;
1474   PetscInt       s1,s2,s3;
1475 
1476   PetscFunctionBegin;
1477   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1478   if (rr) {
1479     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1480     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1481     /* Overlap communication with computation. */
1482     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1483   }
1484   if (ll) {
1485     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1486     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1487     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1488   }
1489   /* scale  the diagonal block */
1490   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1491 
1492   if (rr) {
1493     /* Do a scatter end and then right scale the off-diagonal block */
1494     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1495     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1496   }
1497 
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1504 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1505 {
1506   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   if (!a->rank) {
1511     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1518 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1519 {
1520   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1525   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1530 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1531 {
1532   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1533   PetscErrorCode ierr;
1534 
1535   PetscFunctionBegin;
1536   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1537   PetscFunctionReturn(0);
1538 }
1539 
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatEqual_MPIAIJ"
1542 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1543 {
1544   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1545   Mat            a,b,c,d;
1546   PetscTruth     flg;
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   a = matA->A; b = matA->B;
1551   c = matB->A; d = matB->B;
1552 
1553   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1554   if (flg) {
1555     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1556   }
1557   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatCopy_MPIAIJ"
1563 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1564 {
1565   PetscErrorCode ierr;
1566   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1567   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1568 
1569   PetscFunctionBegin;
1570   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1571   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1572     /* because of the column compression in the off-processor part of the matrix a->B,
1573        the number of columns in a->B and b->B may be different, hence we cannot call
1574        the MatCopy() directly on the two parts. If need be, we can provide a more
1575        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1576        then copying the submatrices */
1577     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1578   } else {
1579     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1580     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1581   }
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1587 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1588 {
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #include "petscblaslapack.h"
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatAXPY_MPIAIJ"
1599 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1600 {
1601   PetscErrorCode ierr;
1602   PetscInt       i;
1603   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1604   PetscBLASInt   bnz,one=1;
1605   Mat_SeqAIJ     *x,*y;
1606 
1607   PetscFunctionBegin;
1608   if (str == SAME_NONZERO_PATTERN) {
1609     x = (Mat_SeqAIJ *)xx->A->data;
1610     y = (Mat_SeqAIJ *)yy->A->data;
1611     bnz = (PetscBLASInt)x->nz;
1612     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1613     x = (Mat_SeqAIJ *)xx->B->data;
1614     y = (Mat_SeqAIJ *)yy->B->data;
1615     bnz = (PetscBLASInt)x->nz;
1616     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1617   } else if (str == SUBSET_NONZERO_PATTERN) {
1618     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1619 
1620     x = (Mat_SeqAIJ *)xx->B->data;
1621     y = (Mat_SeqAIJ *)yy->B->data;
1622     if (y->xtoy && y->XtoY != xx->B) {
1623       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1624       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1625     }
1626     if (!y->xtoy) { /* get xtoy */
1627       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1628       y->XtoY = xx->B;
1629     }
1630     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1631   } else {
1632     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1633   }
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 /* -------------------------------------------------------------------*/
1638 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1639        MatGetRow_MPIAIJ,
1640        MatRestoreRow_MPIAIJ,
1641        MatMult_MPIAIJ,
1642 /* 4*/ MatMultAdd_MPIAIJ,
1643        MatMultTranspose_MPIAIJ,
1644        MatMultTransposeAdd_MPIAIJ,
1645        0,
1646        0,
1647        0,
1648 /*10*/ 0,
1649        0,
1650        0,
1651        MatRelax_MPIAIJ,
1652        MatTranspose_MPIAIJ,
1653 /*15*/ MatGetInfo_MPIAIJ,
1654        MatEqual_MPIAIJ,
1655        MatGetDiagonal_MPIAIJ,
1656        MatDiagonalScale_MPIAIJ,
1657        MatNorm_MPIAIJ,
1658 /*20*/ MatAssemblyBegin_MPIAIJ,
1659        MatAssemblyEnd_MPIAIJ,
1660        0,
1661        MatSetOption_MPIAIJ,
1662        MatZeroEntries_MPIAIJ,
1663 /*25*/ MatZeroRows_MPIAIJ,
1664        0,
1665        0,
1666        0,
1667        0,
1668 /*30*/ MatSetUpPreallocation_MPIAIJ,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*35*/ MatDuplicate_MPIAIJ,
1674        0,
1675        0,
1676        0,
1677        0,
1678 /*40*/ MatAXPY_MPIAIJ,
1679        MatGetSubMatrices_MPIAIJ,
1680        MatIncreaseOverlap_MPIAIJ,
1681        MatGetValues_MPIAIJ,
1682        MatCopy_MPIAIJ,
1683 /*45*/ MatPrintHelp_MPIAIJ,
1684        MatScale_MPIAIJ,
1685        0,
1686        0,
1687        0,
1688 /*50*/ MatSetBlockSize_MPIAIJ,
1689        0,
1690        0,
1691        0,
1692        0,
1693 /*55*/ MatFDColoringCreate_MPIAIJ,
1694        0,
1695        MatSetUnfactored_MPIAIJ,
1696        0,
1697        0,
1698 /*60*/ MatGetSubMatrix_MPIAIJ,
1699        MatDestroy_MPIAIJ,
1700        MatView_MPIAIJ,
1701        MatGetPetscMaps_Petsc,
1702        0,
1703 /*65*/ 0,
1704        0,
1705        0,
1706        0,
1707        0,
1708 /*70*/ 0,
1709        0,
1710        MatSetColoring_MPIAIJ,
1711 #if defined(PETSC_HAVE_ADIC)
1712        MatSetValuesAdic_MPIAIJ,
1713 #else
1714        0,
1715 #endif
1716        MatSetValuesAdifor_MPIAIJ,
1717 /*75*/ 0,
1718        0,
1719        0,
1720        0,
1721        0,
1722 /*80*/ 0,
1723        0,
1724        0,
1725        0,
1726 /*84*/ MatLoad_MPIAIJ,
1727        0,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1733        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1734        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1735        MatPtAP_Basic,
1736        MatPtAPSymbolic_MPIAIJ,
1737 /*95*/ MatPtAPNumeric_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*100*/0,
1743        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1744        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1745 };
1746 
1747 /* ----------------------------------------------------------------------------------------*/
1748 
1749 EXTERN_C_BEGIN
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1752 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1753 {
1754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1755   PetscErrorCode ierr;
1756 
1757   PetscFunctionBegin;
1758   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1759   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1760   PetscFunctionReturn(0);
1761 }
1762 EXTERN_C_END
1763 
1764 EXTERN_C_BEGIN
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1767 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1768 {
1769   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1770   PetscErrorCode ierr;
1771 
1772   PetscFunctionBegin;
1773   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1774   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 EXTERN_C_END
1778 
1779 #include "petscpc.h"
1780 EXTERN_C_BEGIN
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1783 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1784 {
1785   Mat_MPIAIJ     *b;
1786   PetscErrorCode ierr;
1787   PetscInt       i;
1788 
1789   PetscFunctionBegin;
1790   B->preallocated = PETSC_TRUE;
1791   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1792   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1793   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1794   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1795   if (d_nnz) {
1796     for (i=0; i<B->m; i++) {
1797       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1798     }
1799   }
1800   if (o_nnz) {
1801     for (i=0; i<B->m; i++) {
1802       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1803     }
1804   }
1805   b = (Mat_MPIAIJ*)B->data;
1806   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1807   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1815 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1816 {
1817   Mat            mat;
1818   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1819   PetscErrorCode ierr;
1820 
1821   PetscFunctionBegin;
1822   *newmat       = 0;
1823   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1824   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1825   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1826   a    = (Mat_MPIAIJ*)mat->data;
1827 
1828   mat->factor       = matin->factor;
1829   mat->bs           = matin->bs;
1830   mat->assembled    = PETSC_TRUE;
1831   mat->insertmode   = NOT_SET_VALUES;
1832   mat->preallocated = PETSC_TRUE;
1833 
1834   a->rstart       = oldmat->rstart;
1835   a->rend         = oldmat->rend;
1836   a->cstart       = oldmat->cstart;
1837   a->cend         = oldmat->cend;
1838   a->size         = oldmat->size;
1839   a->rank         = oldmat->rank;
1840   a->donotstash   = oldmat->donotstash;
1841   a->roworiented  = oldmat->roworiented;
1842   a->rowindices   = 0;
1843   a->rowvalues    = 0;
1844   a->getrowactive = PETSC_FALSE;
1845 
1846   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1847   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1848   if (oldmat->colmap) {
1849 #if defined (PETSC_USE_CTABLE)
1850     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1851 #else
1852     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1853     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1854     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1855 #endif
1856   } else a->colmap = 0;
1857   if (oldmat->garray) {
1858     PetscInt len;
1859     len  = oldmat->B->n;
1860     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1861     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1862     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1863   } else a->garray = 0;
1864 
1865   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1866   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1867   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1869   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1870   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1871   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1872   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1873   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1874   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1875   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1876   *newmat = mat;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #include "petscsys.h"
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatLoad_MPIAIJ"
1884 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1885 {
1886   Mat            A;
1887   PetscScalar    *vals,*svals;
1888   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1889   MPI_Status     status;
1890   PetscErrorCode ierr;
1891   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1892   PetscInt       i,nz,j,rstart,rend;
1893   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1894   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1895   PetscInt       cend,cstart,n,*rowners;
1896   int            fd;
1897 
1898   PetscFunctionBegin;
1899   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1900   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1901   if (!rank) {
1902     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1903     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1904     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1905   }
1906 
1907   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1908   M = header[1]; N = header[2];
1909   /* determine ownership of all rows */
1910   m    = M/size + ((M % size) > rank);
1911   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1912   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1913   rowners[0] = 0;
1914   for (i=2; i<=size; i++) {
1915     rowners[i] += rowners[i-1];
1916   }
1917   rstart = rowners[rank];
1918   rend   = rowners[rank+1];
1919 
1920   /* distribute row lengths to all processors */
1921   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1922   if (!rank) {
1923     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1924     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1925     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1926     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1927     for (j=0; j<m; j++) {
1928       procsnz[0] += ourlens[j];
1929     }
1930     for (i=1; i<size; i++) {
1931       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1932       /* calculate the number of nonzeros on each processor */
1933       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1934         procsnz[i] += rowlengths[j];
1935       }
1936       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1937     }
1938     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1939   } else {
1940     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1941   }
1942 
1943   if (!rank) {
1944     /* determine max buffer needed and allocate it */
1945     maxnz = 0;
1946     for (i=0; i<size; i++) {
1947       maxnz = PetscMax(maxnz,procsnz[i]);
1948     }
1949     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1950 
1951     /* read in my part of the matrix column indices  */
1952     nz   = procsnz[0];
1953     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1954     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1955 
1956     /* read in every one elses and ship off */
1957     for (i=1; i<size; i++) {
1958       nz   = procsnz[i];
1959       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1960       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1961     }
1962     ierr = PetscFree(cols);CHKERRQ(ierr);
1963   } else {
1964     /* determine buffer space needed for message */
1965     nz = 0;
1966     for (i=0; i<m; i++) {
1967       nz += ourlens[i];
1968     }
1969     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1970 
1971     /* receive message of column indices*/
1972     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1973     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1974     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1975   }
1976 
1977   /* determine column ownership if matrix is not square */
1978   if (N != M) {
1979     n      = N/size + ((N % size) > rank);
1980     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1981     cstart = cend - n;
1982   } else {
1983     cstart = rstart;
1984     cend   = rend;
1985     n      = cend - cstart;
1986   }
1987 
1988   /* loop over local rows, determining number of off diagonal entries */
1989   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1990   jj = 0;
1991   for (i=0; i<m; i++) {
1992     for (j=0; j<ourlens[i]; j++) {
1993       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1994       jj++;
1995     }
1996   }
1997 
1998   /* create our matrix */
1999   for (i=0; i<m; i++) {
2000     ourlens[i] -= offlens[i];
2001   }
2002   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2003   ierr = MatSetType(A,type);CHKERRQ(ierr);
2004   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2005 
2006   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2007   for (i=0; i<m; i++) {
2008     ourlens[i] += offlens[i];
2009   }
2010 
2011   if (!rank) {
2012     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2013 
2014     /* read in my part of the matrix numerical values  */
2015     nz   = procsnz[0];
2016     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2017 
2018     /* insert into matrix */
2019     jj      = rstart;
2020     smycols = mycols;
2021     svals   = vals;
2022     for (i=0; i<m; i++) {
2023       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2024       smycols += ourlens[i];
2025       svals   += ourlens[i];
2026       jj++;
2027     }
2028 
2029     /* read in other processors and ship out */
2030     for (i=1; i<size; i++) {
2031       nz   = procsnz[i];
2032       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2033       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2034     }
2035     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2036   } else {
2037     /* receive numeric values */
2038     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2039 
2040     /* receive message of values*/
2041     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2042     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2043     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2044 
2045     /* insert into matrix */
2046     jj      = rstart;
2047     smycols = mycols;
2048     svals   = vals;
2049     for (i=0; i<m; i++) {
2050       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2051       smycols += ourlens[i];
2052       svals   += ourlens[i];
2053       jj++;
2054     }
2055   }
2056   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2057   ierr = PetscFree(vals);CHKERRQ(ierr);
2058   ierr = PetscFree(mycols);CHKERRQ(ierr);
2059   ierr = PetscFree(rowners);CHKERRQ(ierr);
2060 
2061   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   *newmat = A;
2064   PetscFunctionReturn(0);
2065 }
2066 
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2069 /*
2070     Not great since it makes two copies of the submatrix, first an SeqAIJ
2071   in local and then by concatenating the local matrices the end result.
2072   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2073 */
2074 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2075 {
2076   PetscErrorCode ierr;
2077   PetscMPIInt    rank,size;
2078   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2079   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2080   Mat            *local,M,Mreuse;
2081   PetscScalar    *vwork,*aa;
2082   MPI_Comm       comm = mat->comm;
2083   Mat_SeqAIJ     *aij;
2084 
2085 
2086   PetscFunctionBegin;
2087   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2088   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2089 
2090   if (call ==  MAT_REUSE_MATRIX) {
2091     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2092     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2093     local = &Mreuse;
2094     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2095   } else {
2096     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2097     Mreuse = *local;
2098     ierr   = PetscFree(local);CHKERRQ(ierr);
2099   }
2100 
2101   /*
2102       m - number of local rows
2103       n - number of columns (same on all processors)
2104       rstart - first row in new global matrix generated
2105   */
2106   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2107   if (call == MAT_INITIAL_MATRIX) {
2108     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2109     ii  = aij->i;
2110     jj  = aij->j;
2111 
2112     /*
2113         Determine the number of non-zeros in the diagonal and off-diagonal
2114         portions of the matrix in order to do correct preallocation
2115     */
2116 
2117     /* first get start and end of "diagonal" columns */
2118     if (csize == PETSC_DECIDE) {
2119       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2120       if (mglobal == n) { /* square matrix */
2121 	nlocal = m;
2122       } else {
2123         nlocal = n/size + ((n % size) > rank);
2124       }
2125     } else {
2126       nlocal = csize;
2127     }
2128     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2129     rstart = rend - nlocal;
2130     if (rank == size - 1 && rend != n) {
2131       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2132     }
2133 
2134     /* next, compute all the lengths */
2135     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2136     olens = dlens + m;
2137     for (i=0; i<m; i++) {
2138       jend = ii[i+1] - ii[i];
2139       olen = 0;
2140       dlen = 0;
2141       for (j=0; j<jend; j++) {
2142         if (*jj < rstart || *jj >= rend) olen++;
2143         else dlen++;
2144         jj++;
2145       }
2146       olens[i] = olen;
2147       dlens[i] = dlen;
2148     }
2149     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2150     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2151     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2152     ierr = PetscFree(dlens);CHKERRQ(ierr);
2153   } else {
2154     PetscInt ml,nl;
2155 
2156     M = *newmat;
2157     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2158     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2159     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2160     /*
2161          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2162        rather than the slower MatSetValues().
2163     */
2164     M->was_assembled = PETSC_TRUE;
2165     M->assembled     = PETSC_FALSE;
2166   }
2167   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2168   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2169   ii  = aij->i;
2170   jj  = aij->j;
2171   aa  = aij->a;
2172   for (i=0; i<m; i++) {
2173     row   = rstart + i;
2174     nz    = ii[i+1] - ii[i];
2175     cwork = jj;     jj += nz;
2176     vwork = aa;     aa += nz;
2177     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2178   }
2179 
2180   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2182   *newmat = M;
2183 
2184   /* save submatrix used in processor for next request */
2185   if (call ==  MAT_INITIAL_MATRIX) {
2186     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2187     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2188   }
2189 
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 EXTERN_C_BEGIN
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2196 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2197 {
2198   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2199   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2200   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2201   const PetscInt *JJ;
2202   PetscScalar    *values;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206 #if defined(PETSC_OPT_g)
2207   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2208 #endif
2209   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2210   o_nnz = d_nnz + m;
2211 
2212   for (i=0; i<m; i++) {
2213     nnz     = I[i+1]- I[i];
2214     JJ      = J + I[i];
2215     nnz_max = PetscMax(nnz_max,nnz);
2216 #if defined(PETSC_OPT_g)
2217     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2218 #endif
2219     for (j=0; j<nnz; j++) {
2220       if (*JJ >= cstart) break;
2221       JJ++;
2222     }
2223     d = 0;
2224     for (; j<nnz; j++) {
2225       if (*JJ++ >= cend) break;
2226       d++;
2227     }
2228     d_nnz[i] = d;
2229     o_nnz[i] = nnz - d;
2230   }
2231   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2232   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2233 
2234   if (v) values = (PetscScalar*)v;
2235   else {
2236     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2237     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2238   }
2239 
2240   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2241   for (i=0; i<m; i++) {
2242     ii   = i + rstart;
2243     nnz  = I[i+1]- I[i];
2244     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2245   }
2246   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2247   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2248   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2249 
2250   if (!v) {
2251     ierr = PetscFree(values);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 EXTERN_C_END
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2259 /*@C
2260    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2261    (the default parallel PETSc format).
2262 
2263    Collective on MPI_Comm
2264 
2265    Input Parameters:
2266 +  A - the matrix
2267 .  i - the indices into j for the start of each local row (starts with zero)
2268 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2269 -  v - optional values in the matrix
2270 
2271    Level: developer
2272 
2273 .keywords: matrix, aij, compressed row, sparse, parallel
2274 
2275 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2276 @*/
2277 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2278 {
2279   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2283   if (f) {
2284     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2291 /*@C
2292    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2293    (the default parallel PETSc format).  For good matrix assembly performance
2294    the user should preallocate the matrix storage by setting the parameters
2295    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2296    performance can be increased by more than a factor of 50.
2297 
2298    Collective on MPI_Comm
2299 
2300    Input Parameters:
2301 +  A - the matrix
2302 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2303            (same value is used for all local rows)
2304 .  d_nnz - array containing the number of nonzeros in the various rows of the
2305            DIAGONAL portion of the local submatrix (possibly different for each row)
2306            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2307            The size of this array is equal to the number of local rows, i.e 'm'.
2308            You must leave room for the diagonal entry even if it is zero.
2309 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2310            submatrix (same value is used for all local rows).
2311 -  o_nnz - array containing the number of nonzeros in the various rows of the
2312            OFF-DIAGONAL portion of the local submatrix (possibly different for
2313            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2314            structure. The size of this array is equal to the number
2315            of local rows, i.e 'm'.
2316 
2317    If the *_nnz parameter is given then the *_nz parameter is ignored
2318 
2319    The AIJ format (also called the Yale sparse matrix format or
2320    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2321    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2322 
2323    The parallel matrix is partitioned such that the first m0 rows belong to
2324    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2325    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2326 
2327    The DIAGONAL portion of the local submatrix of a processor can be defined
2328    as the submatrix which is obtained by extraction the part corresponding
2329    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2330    first row that belongs to the processor, and r2 is the last row belonging
2331    to the this processor. This is a square mxm matrix. The remaining portion
2332    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2333 
2334    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2335 
2336    Example usage:
2337 
2338    Consider the following 8x8 matrix with 34 non-zero values, that is
2339    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2340    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2341    as follows:
2342 
2343 .vb
2344             1  2  0  |  0  3  0  |  0  4
2345     Proc0   0  5  6  |  7  0  0  |  8  0
2346             9  0 10  | 11  0  0  | 12  0
2347     -------------------------------------
2348            13  0 14  | 15 16 17  |  0  0
2349     Proc1   0 18  0  | 19 20 21  |  0  0
2350             0  0  0  | 22 23  0  | 24  0
2351     -------------------------------------
2352     Proc2  25 26 27  |  0  0 28  | 29  0
2353            30  0  0  | 31 32 33  |  0 34
2354 .ve
2355 
2356    This can be represented as a collection of submatrices as:
2357 
2358 .vb
2359       A B C
2360       D E F
2361       G H I
2362 .ve
2363 
2364    Where the submatrices A,B,C are owned by proc0, D,E,F are
2365    owned by proc1, G,H,I are owned by proc2.
2366 
2367    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2368    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2369    The 'M','N' parameters are 8,8, and have the same values on all procs.
2370 
2371    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2372    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2373    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2374    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2375    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2376    matrix, ans [DF] as another SeqAIJ matrix.
2377 
2378    When d_nz, o_nz parameters are specified, d_nz storage elements are
2379    allocated for every row of the local diagonal submatrix, and o_nz
2380    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2381    One way to choose d_nz and o_nz is to use the max nonzerors per local
2382    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2383    In this case, the values of d_nz,o_nz are:
2384 .vb
2385      proc0 : dnz = 2, o_nz = 2
2386      proc1 : dnz = 3, o_nz = 2
2387      proc2 : dnz = 1, o_nz = 4
2388 .ve
2389    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2390    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2391    for proc3. i.e we are using 12+15+10=37 storage locations to store
2392    34 values.
2393 
2394    When d_nnz, o_nnz parameters are specified, the storage is specified
2395    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2396    In the above case the values for d_nnz,o_nnz are:
2397 .vb
2398      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2399      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2400      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2401 .ve
2402    Here the space allocated is sum of all the above values i.e 34, and
2403    hence pre-allocation is perfect.
2404 
2405    Level: intermediate
2406 
2407 .keywords: matrix, aij, compressed row, sparse, parallel
2408 
2409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2410           MPIAIJ
2411 @*/
2412 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2413 {
2414   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2418   if (f) {
2419     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2420   }
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatCreateMPIAIJ"
2426 /*@C
2427    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2428    (the default parallel PETSc format).  For good matrix assembly performance
2429    the user should preallocate the matrix storage by setting the parameters
2430    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2431    performance can be increased by more than a factor of 50.
2432 
2433    Collective on MPI_Comm
2434 
2435    Input Parameters:
2436 +  comm - MPI communicator
2437 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2438            This value should be the same as the local size used in creating the
2439            y vector for the matrix-vector product y = Ax.
2440 .  n - This value should be the same as the local size used in creating the
2441        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2442        calculated if N is given) For square matrices n is almost always m.
2443 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2444 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2445 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2446            (same value is used for all local rows)
2447 .  d_nnz - array containing the number of nonzeros in the various rows of the
2448            DIAGONAL portion of the local submatrix (possibly different for each row)
2449            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2450            The size of this array is equal to the number of local rows, i.e 'm'.
2451            You must leave room for the diagonal entry even if it is zero.
2452 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2453            submatrix (same value is used for all local rows).
2454 -  o_nnz - array containing the number of nonzeros in the various rows of the
2455            OFF-DIAGONAL portion of the local submatrix (possibly different for
2456            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2457            structure. The size of this array is equal to the number
2458            of local rows, i.e 'm'.
2459 
2460    Output Parameter:
2461 .  A - the matrix
2462 
2463    Notes:
2464    If the *_nnz parameter is given then the *_nz parameter is ignored
2465 
2466    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2467    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2468    storage requirements for this matrix.
2469 
2470    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2471    processor than it must be used on all processors that share the object for
2472    that argument.
2473 
2474    The user MUST specify either the local or global matrix dimensions
2475    (possibly both).
2476 
2477    The parallel matrix is partitioned such that the first m0 rows belong to
2478    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2479    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2480 
2481    The DIAGONAL portion of the local submatrix of a processor can be defined
2482    as the submatrix which is obtained by extraction the part corresponding
2483    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2484    first row that belongs to the processor, and r2 is the last row belonging
2485    to the this processor. This is a square mxm matrix. The remaining portion
2486    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2487 
2488    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2489 
2490    When calling this routine with a single process communicator, a matrix of
2491    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2492    type of communicator, use the construction mechanism:
2493      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2494 
2495    By default, this format uses inodes (identical nodes) when possible.
2496    We search for consecutive rows with the same nonzero structure, thereby
2497    reusing matrix information to achieve increased efficiency.
2498 
2499    Options Database Keys:
2500 +  -mat_aij_no_inode  - Do not use inodes
2501 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2502 -  -mat_aij_oneindex - Internally use indexing starting at 1
2503         rather than 0.  Note that when calling MatSetValues(),
2504         the user still MUST index entries starting at 0!
2505 
2506 
2507    Example usage:
2508 
2509    Consider the following 8x8 matrix with 34 non-zero values, that is
2510    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2511    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2512    as follows:
2513 
2514 .vb
2515             1  2  0  |  0  3  0  |  0  4
2516     Proc0   0  5  6  |  7  0  0  |  8  0
2517             9  0 10  | 11  0  0  | 12  0
2518     -------------------------------------
2519            13  0 14  | 15 16 17  |  0  0
2520     Proc1   0 18  0  | 19 20 21  |  0  0
2521             0  0  0  | 22 23  0  | 24  0
2522     -------------------------------------
2523     Proc2  25 26 27  |  0  0 28  | 29  0
2524            30  0  0  | 31 32 33  |  0 34
2525 .ve
2526 
2527    This can be represented as a collection of submatrices as:
2528 
2529 .vb
2530       A B C
2531       D E F
2532       G H I
2533 .ve
2534 
2535    Where the submatrices A,B,C are owned by proc0, D,E,F are
2536    owned by proc1, G,H,I are owned by proc2.
2537 
2538    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2539    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2540    The 'M','N' parameters are 8,8, and have the same values on all procs.
2541 
2542    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2543    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2544    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2545    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2546    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2547    matrix, ans [DF] as another SeqAIJ matrix.
2548 
2549    When d_nz, o_nz parameters are specified, d_nz storage elements are
2550    allocated for every row of the local diagonal submatrix, and o_nz
2551    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2552    One way to choose d_nz and o_nz is to use the max nonzerors per local
2553    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2554    In this case, the values of d_nz,o_nz are:
2555 .vb
2556      proc0 : dnz = 2, o_nz = 2
2557      proc1 : dnz = 3, o_nz = 2
2558      proc2 : dnz = 1, o_nz = 4
2559 .ve
2560    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2561    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2562    for proc3. i.e we are using 12+15+10=37 storage locations to store
2563    34 values.
2564 
2565    When d_nnz, o_nnz parameters are specified, the storage is specified
2566    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2567    In the above case the values for d_nnz,o_nnz are:
2568 .vb
2569      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2570      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2571      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2572 .ve
2573    Here the space allocated is sum of all the above values i.e 34, and
2574    hence pre-allocation is perfect.
2575 
2576    Level: intermediate
2577 
2578 .keywords: matrix, aij, compressed row, sparse, parallel
2579 
2580 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2581           MPIAIJ
2582 @*/
2583 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2584 {
2585   PetscErrorCode ierr;
2586   PetscMPIInt    size;
2587 
2588   PetscFunctionBegin;
2589   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2591   if (size > 1) {
2592     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2593     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2594   } else {
2595     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2596     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2597   }
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2603 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2604 {
2605   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2606 
2607   PetscFunctionBegin;
2608   *Ad     = a->A;
2609   *Ao     = a->B;
2610   *colmap = a->garray;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2616 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2617 {
2618   PetscErrorCode ierr;
2619   PetscInt       i;
2620   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2621 
2622   PetscFunctionBegin;
2623   if (coloring->ctype == IS_COLORING_LOCAL) {
2624     ISColoringValue *allcolors,*colors;
2625     ISColoring      ocoloring;
2626 
2627     /* set coloring for diagonal portion */
2628     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2629 
2630     /* set coloring for off-diagonal portion */
2631     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2632     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2633     for (i=0; i<a->B->n; i++) {
2634       colors[i] = allcolors[a->garray[i]];
2635     }
2636     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2637     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2638     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2639     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2640   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2641     ISColoringValue *colors;
2642     PetscInt             *larray;
2643     ISColoring      ocoloring;
2644 
2645     /* set coloring for diagonal portion */
2646     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2647     for (i=0; i<a->A->n; i++) {
2648       larray[i] = i + a->cstart;
2649     }
2650     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2651     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2652     for (i=0; i<a->A->n; i++) {
2653       colors[i] = coloring->colors[larray[i]];
2654     }
2655     ierr = PetscFree(larray);CHKERRQ(ierr);
2656     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2657     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2658     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2659 
2660     /* set coloring for off-diagonal portion */
2661     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2662     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2663     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2664     for (i=0; i<a->B->n; i++) {
2665       colors[i] = coloring->colors[larray[i]];
2666     }
2667     ierr = PetscFree(larray);CHKERRQ(ierr);
2668     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2669     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2670     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2671   } else {
2672     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2673   }
2674 
2675   PetscFunctionReturn(0);
2676 }
2677 
2678 #if defined(PETSC_HAVE_ADIC)
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2681 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2682 {
2683   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2684   PetscErrorCode ierr;
2685 
2686   PetscFunctionBegin;
2687   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2688   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2689   PetscFunctionReturn(0);
2690 }
2691 #endif
2692 
2693 #undef __FUNCT__
2694 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2695 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2696 {
2697   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2698   PetscErrorCode ierr;
2699 
2700   PetscFunctionBegin;
2701   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2702   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatMerge"
2708 /*@C
2709       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2710                  matrices from each processor
2711 
2712     Collective on MPI_Comm
2713 
2714    Input Parameters:
2715 +    comm - the communicators the parallel matrix will live on
2716 .    inmat - the input sequential matrices
2717 .    n - number of local columns (or PETSC_DECIDE)
2718 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2719 
2720    Output Parameter:
2721 .    outmat - the parallel matrix generated
2722 
2723     Level: advanced
2724 
2725    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2726 
2727 @*/
2728 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2729 {
2730   PetscErrorCode ierr;
2731   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2732   PetscInt       *indx;
2733   PetscScalar    *values;
2734   PetscMap       columnmap,rowmap;
2735 
2736   PetscFunctionBegin;
2737     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2738   /*
2739   PetscMPIInt       rank;
2740   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2741   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2742   */
2743   if (scall == MAT_INITIAL_MATRIX){
2744     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2745     if (n == PETSC_DECIDE){
2746       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2747       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2748       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2749       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2750       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2751     }
2752 
2753     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2754     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2755     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2756     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2757     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2758 
2759     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2760     for (i=0;i<m;i++) {
2761       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2762       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2763       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2764     }
2765     /* This routine will ONLY return MPIAIJ type matrix */
2766     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2767     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2768     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2769     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2770 
2771   } else if (scall == MAT_REUSE_MATRIX){
2772     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2773   } else {
2774     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2775   }
2776 
2777   for (i=0;i<m;i++) {
2778     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2779     I    = i + rstart;
2780     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2781     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2782   }
2783   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2784   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2786 
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatFileSplit"
2792 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2793 {
2794   PetscErrorCode    ierr;
2795   PetscMPIInt       rank;
2796   PetscInt          m,N,i,rstart,nnz;
2797   size_t            len;
2798   const PetscInt    *indx;
2799   PetscViewer       out;
2800   char              *name;
2801   Mat               B;
2802   const PetscScalar *values;
2803 
2804   PetscFunctionBegin;
2805   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2806   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2807   /* Should this be the type of the diagonal block of A? */
2808   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2809   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2810   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2811   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2812   for (i=0;i<m;i++) {
2813     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2814     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2815     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2816   }
2817   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819 
2820   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2821   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2822   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2823   sprintf(name,"%s.%d",outfile,rank);
2824   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2825   ierr = PetscFree(name);
2826   ierr = MatView(B,out);CHKERRQ(ierr);
2827   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2828   ierr = MatDestroy(B);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2835 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2836 {
2837   PetscErrorCode       ierr;
2838   Mat_Merge_SeqsToMPI  *merge;
2839   PetscObjectContainer container;
2840 
2841   PetscFunctionBegin;
2842   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2843   if (container) {
2844     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2845     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2846     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2848     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2850     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2851     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2852     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2853     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2854     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2855     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2856 
2857     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2858     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2859   }
2860   ierr = PetscFree(merge);CHKERRQ(ierr);
2861 
2862   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 #include "src/mat/utils/freespace.h"
2867 #include "petscbt.h"
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatMerge_SeqsToMPI"
2870 /*@C
2871       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2872                  matrices from each processor
2873 
2874     Collective on MPI_Comm
2875 
2876    Input Parameters:
2877 +    comm - the communicators the parallel matrix will live on
2878 .    seqmat - the input sequential matrices
2879 .    m - number of local rows (or PETSC_DECIDE)
2880 .    n - number of local columns (or PETSC_DECIDE)
2881 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2882 
2883    Output Parameter:
2884 .    mpimat - the parallel matrix generated
2885 
2886     Level: advanced
2887 
2888    Notes:
2889      The dimensions of the sequential matrix in each processor MUST be the same.
2890      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2891      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2892 @*/
2893 static PetscEvent logkey_seqstompinum = 0;
2894 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2895 {
2896   PetscErrorCode       ierr;
2897   MPI_Comm             comm=mpimat->comm;
2898   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2899   PetscMPIInt          size,rank,taga,*len_s;
2900   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2901   PetscInt             proc,m;
2902   PetscInt             **buf_ri,**buf_rj;
2903   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2904   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2905   MPI_Request          *s_waits,*r_waits;
2906   MPI_Status           *status;
2907   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2908   Mat_Merge_SeqsToMPI  *merge;
2909   PetscObjectContainer container;
2910 
2911   PetscFunctionBegin;
2912   if (!logkey_seqstompinum) {
2913     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2914   }
2915   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2916 
2917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2918   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2919 
2920   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2921   if (container) {
2922     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2923   }
2924   bi     = merge->bi;
2925   bj     = merge->bj;
2926   buf_ri = merge->buf_ri;
2927   buf_rj = merge->buf_rj;
2928 
2929   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2930   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2931   len_s  = merge->len_s;
2932 
2933   /* send and recv matrix values */
2934   /*-----------------------------*/
2935   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2936   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2937 
2938   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2939   for (proc=0,k=0; proc<size; proc++){
2940     if (!len_s[proc]) continue;
2941     i = owners[proc];
2942     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2943     k++;
2944   }
2945 
2946   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2947   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2948   ierr = PetscFree(status);CHKERRQ(ierr);
2949 
2950   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2951   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2952 
2953   /* insert mat values of mpimat */
2954   /*----------------------------*/
2955   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2956   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2957   nextrow = buf_ri_k + merge->nrecv;
2958   nextai  = nextrow + merge->nrecv;
2959 
2960   for (k=0; k<merge->nrecv; k++){
2961     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2962     nrows = *(buf_ri_k[k]);
2963     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2964     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2965   }
2966 
2967   /* set values of ba */
2968   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2969   for (i=0; i<m; i++) {
2970     arow = owners[rank] + i;
2971     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2972     bnzi = bi[i+1] - bi[i];
2973     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2974 
2975     /* add local non-zero vals of this proc's seqmat into ba */
2976     anzi = ai[arow+1] - ai[arow];
2977     aj   = a->j + ai[arow];
2978     aa   = a->a + ai[arow];
2979     nextaj = 0;
2980     for (j=0; nextaj<anzi; j++){
2981       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2982         ba_i[j] += aa[nextaj++];
2983       }
2984     }
2985 
2986     /* add received vals into ba */
2987     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2988       /* i-th row */
2989       if (i == *nextrow[k]) {
2990         anzi = *(nextai[k]+1) - *nextai[k];
2991         aj   = buf_rj[k] + *(nextai[k]);
2992         aa   = abuf_r[k] + *(nextai[k]);
2993         nextaj = 0;
2994         for (j=0; nextaj<anzi; j++){
2995           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2996             ba_i[j] += aa[nextaj++];
2997           }
2998         }
2999         nextrow[k]++; nextai[k]++;
3000       }
3001     }
3002     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3003   }
3004   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3005   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3006 
3007   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3008   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3009   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3010   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 static PetscEvent logkey_seqstompisym = 0;
3014 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3015 {
3016   PetscErrorCode       ierr;
3017   Mat                  B_mpi;
3018   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3019   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3020   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3021   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3022   PetscInt             len,proc,*dnz,*onz;
3023   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3024   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3025   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3026   MPI_Status           *status;
3027   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3028   PetscBT              lnkbt;
3029   Mat_Merge_SeqsToMPI  *merge;
3030   PetscObjectContainer container;
3031 
3032   PetscFunctionBegin;
3033   if (!logkey_seqstompisym) {
3034     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3035   }
3036   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3037 
3038   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3039   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3040 
3041   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3042   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3043 
3044   /* determine row ownership */
3045   /*---------------------------------------------------------*/
3046   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3047   if (m == PETSC_DECIDE) {
3048     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3049   } else {
3050     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3051   }
3052   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3053   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3054   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3055 
3056   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3057   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3058 
3059   /* determine the number of messages to send, their lengths */
3060   /*---------------------------------------------------------*/
3061   len_s  = merge->len_s;
3062 
3063   len = 0;  /* length of buf_si[] */
3064   merge->nsend = 0;
3065   for (proc=0; proc<size; proc++){
3066     len_si[proc] = 0;
3067     if (proc == rank){
3068       len_s[proc] = 0;
3069     } else {
3070       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3071       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3072     }
3073     if (len_s[proc]) {
3074       merge->nsend++;
3075       nrows = 0;
3076       for (i=owners[proc]; i<owners[proc+1]; i++){
3077         if (ai[i+1] > ai[i]) nrows++;
3078       }
3079       len_si[proc] = 2*(nrows+1);
3080       len += len_si[proc];
3081     }
3082   }
3083 
3084   /* determine the number and length of messages to receive for ij-structure */
3085   /*-------------------------------------------------------------------------*/
3086   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3087   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3088 
3089   /* post the Irecv of j-structure */
3090   /*-------------------------------*/
3091   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3092   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3093 
3094   /* post the Isend of j-structure */
3095   /*--------------------------------*/
3096   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3097   sj_waits = si_waits + merge->nsend;
3098 
3099   for (proc=0, k=0; proc<size; proc++){
3100     if (!len_s[proc]) continue;
3101     i = owners[proc];
3102     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3103     k++;
3104   }
3105 
3106   /* receives and sends of j-structure are complete */
3107   /*------------------------------------------------*/
3108   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3109   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3110 
3111   /* send and recv i-structure */
3112   /*---------------------------*/
3113   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3114   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3115 
3116   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3117   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3118   for (proc=0,k=0; proc<size; proc++){
3119     if (!len_s[proc]) continue;
3120     /* form outgoing message for i-structure:
3121          buf_si[0]:                 nrows to be sent
3122                [1:nrows]:           row index (global)
3123                [nrows+1:2*nrows+1]: i-structure index
3124     */
3125     /*-------------------------------------------*/
3126     nrows = len_si[proc]/2 - 1;
3127     buf_si_i    = buf_si + nrows+1;
3128     buf_si[0]   = nrows;
3129     buf_si_i[0] = 0;
3130     nrows = 0;
3131     for (i=owners[proc]; i<owners[proc+1]; i++){
3132       anzi = ai[i+1] - ai[i];
3133       if (anzi) {
3134         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3135         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3136         nrows++;
3137       }
3138     }
3139     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3140     k++;
3141     buf_si += len_si[proc];
3142   }
3143 
3144   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3145   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3146 
3147   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3148   for (i=0; i<merge->nrecv; i++){
3149     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3150   }
3151 
3152   ierr = PetscFree(len_si);CHKERRQ(ierr);
3153   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3154   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3155   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3156   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3157   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3158   ierr = PetscFree(status);CHKERRQ(ierr);
3159 
3160   /* compute a local seq matrix in each processor */
3161   /*----------------------------------------------*/
3162   /* allocate bi array and free space for accumulating nonzero column info */
3163   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3164   bi[0] = 0;
3165 
3166   /* create and initialize a linked list */
3167   nlnk = N+1;
3168   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3169 
3170   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3171   len = 0;
3172   len  = ai[owners[rank+1]] - ai[owners[rank]];
3173   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3174   current_space = free_space;
3175 
3176   /* determine symbolic info for each local row */
3177   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3178   nextrow = buf_ri_k + merge->nrecv;
3179   nextai  = nextrow + merge->nrecv;
3180   for (k=0; k<merge->nrecv; k++){
3181     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3182     nrows = *buf_ri_k[k];
3183     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3184     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3185   }
3186 
3187   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3188   len = 0;
3189   for (i=0;i<m;i++) {
3190     bnzi   = 0;
3191     /* add local non-zero cols of this proc's seqmat into lnk */
3192     arow   = owners[rank] + i;
3193     anzi   = ai[arow+1] - ai[arow];
3194     aj     = a->j + ai[arow];
3195     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3196     bnzi += nlnk;
3197     /* add received col data into lnk */
3198     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3199       if (i == *nextrow[k]) { /* i-th row */
3200         anzi = *(nextai[k]+1) - *nextai[k];
3201         aj   = buf_rj[k] + *nextai[k];
3202         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3203         bnzi += nlnk;
3204         nextrow[k]++; nextai[k]++;
3205       }
3206     }
3207     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3208 
3209     /* if free space is not available, make more free space */
3210     if (current_space->local_remaining<bnzi) {
3211       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3212       nspacedouble++;
3213     }
3214     /* copy data into free space, then initialize lnk */
3215     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3216     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3217 
3218     current_space->array           += bnzi;
3219     current_space->local_used      += bnzi;
3220     current_space->local_remaining -= bnzi;
3221 
3222     bi[i+1] = bi[i] + bnzi;
3223   }
3224 
3225   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3226 
3227   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3228   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3229   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3230 
3231   /* create symbolic parallel matrix B_mpi */
3232   /*---------------------------------------*/
3233   if (n==PETSC_DECIDE) {
3234     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3235   } else {
3236     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3237   }
3238   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3239   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3240   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3241 
3242   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3243   B_mpi->assembled     = PETSC_FALSE;
3244   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3245   merge->bi            = bi;
3246   merge->bj            = bj;
3247   merge->buf_ri        = buf_ri;
3248   merge->buf_rj        = buf_rj;
3249   merge->coi           = PETSC_NULL;
3250   merge->coj           = PETSC_NULL;
3251   merge->owners_co     = PETSC_NULL;
3252 
3253   /* attach the supporting struct to B_mpi for reuse */
3254   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3255   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3256   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3257   *mpimat = B_mpi;
3258   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 static PetscEvent logkey_seqstompi = 0;
3263 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3264 {
3265   PetscErrorCode   ierr;
3266 
3267   PetscFunctionBegin;
3268   if (!logkey_seqstompi) {
3269     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3270   }
3271   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3272   if (scall == MAT_INITIAL_MATRIX){
3273     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3274   }
3275   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3276   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3277   PetscFunctionReturn(0);
3278 }
3279 static PetscEvent logkey_getlocalmat = 0;
3280 #undef __FUNCT__
3281 #define __FUNCT__ "MatGetLocalMat"
3282 /*@C
3283      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3284 
3285     Not Collective
3286 
3287    Input Parameters:
3288 +    A - the matrix
3289 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3290 
3291    Output Parameter:
3292 .    A_loc - the local sequential matrix generated
3293 
3294     Level: developer
3295 
3296 @*/
3297 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3298 {
3299   PetscErrorCode  ierr;
3300   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3301   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3302   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3303   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3304   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3305   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3306 
3307   PetscFunctionBegin;
3308   if (!logkey_getlocalmat) {
3309     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3310   }
3311   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3312   if (scall == MAT_INITIAL_MATRIX){
3313     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3314     ci[0] = 0;
3315     for (i=0; i<am; i++){
3316       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3317     }
3318     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3319     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3320     k = 0;
3321     for (i=0; i<am; i++) {
3322       ncols_o = bi[i+1] - bi[i];
3323       ncols_d = ai[i+1] - ai[i];
3324       /* off-diagonal portion of A */
3325       for (jo=0; jo<ncols_o; jo++) {
3326         col = cmap[*bj];
3327         if (col >= cstart) break;
3328         cj[k]   = col; bj++;
3329         ca[k++] = *ba++;
3330       }
3331       /* diagonal portion of A */
3332       for (j=0; j<ncols_d; j++) {
3333         cj[k]   = cstart + *aj++;
3334         ca[k++] = *aa++;
3335       }
3336       /* off-diagonal portion of A */
3337       for (j=jo; j<ncols_o; j++) {
3338         cj[k]   = cmap[*bj++];
3339         ca[k++] = *ba++;
3340       }
3341     }
3342     /* put together the new matrix */
3343     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3344     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3345     /* Since these are PETSc arrays, change flags to free them as necessary. */
3346     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3347     mat->freedata = PETSC_TRUE;
3348     mat->nonew    = 0;
3349   } else if (scall == MAT_REUSE_MATRIX){
3350     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3351     ci = mat->i; cj = mat->j; ca = mat->a;
3352     for (i=0; i<am; i++) {
3353       /* off-diagonal portion of A */
3354       ncols_o = bi[i+1] - bi[i];
3355       for (jo=0; jo<ncols_o; jo++) {
3356         col = cmap[*bj];
3357         if (col >= cstart) break;
3358         *ca++ = *ba++; bj++;
3359       }
3360       /* diagonal portion of A */
3361       ncols_d = ai[i+1] - ai[i];
3362       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3363       /* off-diagonal portion of A */
3364       for (j=jo; j<ncols_o; j++) {
3365         *ca++ = *ba++; bj++;
3366       }
3367     }
3368   } else {
3369     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3370   }
3371 
3372   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3373   PetscFunctionReturn(0);
3374 }
3375 
3376 static PetscEvent logkey_getlocalmatcondensed = 0;
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatGetLocalMatCondensed"
3379 /*@C
3380      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3381 
3382     Not Collective
3383 
3384    Input Parameters:
3385 +    A - the matrix
3386 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3387 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3388 
3389    Output Parameter:
3390 .    A_loc - the local sequential matrix generated
3391 
3392     Level: developer
3393 
3394 @*/
3395 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3396 {
3397   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3398   PetscErrorCode    ierr;
3399   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3400   IS                isrowa,iscola;
3401   Mat               *aloc;
3402 
3403   PetscFunctionBegin;
3404   if (!logkey_getlocalmatcondensed) {
3405     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3406   }
3407   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3408   if (!row){
3409     start = a->rstart; end = a->rend;
3410     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3411   } else {
3412     isrowa = *row;
3413   }
3414   if (!col){
3415     start = a->cstart;
3416     cmap  = a->garray;
3417     nzA   = a->A->n;
3418     nzB   = a->B->n;
3419     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3420     ncols = 0;
3421     for (i=0; i<nzB; i++) {
3422       if (cmap[i] < start) idx[ncols++] = cmap[i];
3423       else break;
3424     }
3425     imark = i;
3426     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3427     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3428     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3429     ierr = PetscFree(idx);CHKERRQ(ierr);
3430   } else {
3431     iscola = *col;
3432   }
3433   if (scall != MAT_INITIAL_MATRIX){
3434     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3435     aloc[0] = *A_loc;
3436   }
3437   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3438   *A_loc = aloc[0];
3439   ierr = PetscFree(aloc);CHKERRQ(ierr);
3440   if (!row){
3441     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3442   }
3443   if (!col){
3444     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3445   }
3446   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 static PetscEvent logkey_GetBrowsOfAcols = 0;
3451 #undef __FUNCT__
3452 #define __FUNCT__ "MatGetBrowsOfAcols"
3453 /*@C
3454     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3455 
3456     Collective on Mat
3457 
3458    Input Parameters:
3459 +    A,B - the matrices in mpiaij format
3460 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3461 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3462 
3463    Output Parameter:
3464 +    rowb, colb - index sets of rows and columns of B to extract
3465 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3466 -    B_seq - the sequential matrix generated
3467 
3468     Level: developer
3469 
3470 @*/
3471 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3472 {
3473   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3474   PetscErrorCode    ierr;
3475   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3476   IS                isrowb,iscolb;
3477   Mat               *bseq;
3478 
3479   PetscFunctionBegin;
3480   if (a->cstart != b->rstart || a->cend != b->rend){
3481     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3482   }
3483   if (!logkey_GetBrowsOfAcols) {
3484     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3485   }
3486   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3487 
3488   if (scall == MAT_INITIAL_MATRIX){
3489     start = a->cstart;
3490     cmap  = a->garray;
3491     nzA   = a->A->n;
3492     nzB   = a->B->n;
3493     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3494     ncols = 0;
3495     for (i=0; i<nzB; i++) {  /* row < local row index */
3496       if (cmap[i] < start) idx[ncols++] = cmap[i];
3497       else break;
3498     }
3499     imark = i;
3500     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3501     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3502     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3503     ierr = PetscFree(idx);CHKERRQ(ierr);
3504     *brstart = imark;
3505     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3506   } else {
3507     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3508     isrowb = *rowb; iscolb = *colb;
3509     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3510     bseq[0] = *B_seq;
3511   }
3512   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3513   *B_seq = bseq[0];
3514   ierr = PetscFree(bseq);CHKERRQ(ierr);
3515   if (!rowb){
3516     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3517   } else {
3518     *rowb = isrowb;
3519   }
3520   if (!colb){
3521     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3522   } else {
3523     *colb = iscolb;
3524   }
3525   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 static PetscEvent logkey_GetBrowsOfAocols = 0;
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatGetBrowsOfAoCols"
3532 /*@C
3533     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3534     of the OFF-DIAGONAL portion of local A
3535 
3536     Collective on Mat
3537 
3538    Input Parameters:
3539 +    A,B - the matrices in mpiaij format
3540 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3541 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3542 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3543 
3544    Output Parameter:
3545 +    B_oth - the sequential matrix generated
3546 
3547     Level: developer
3548 
3549 @*/
3550 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3551 {
3552   VecScatter_MPI_General *gen_to,*gen_from;
3553   PetscErrorCode         ierr;
3554   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3555   Mat_SeqAIJ             *b_oth;
3556   VecScatter             ctx=a->Mvctx;
3557   MPI_Comm               comm=ctx->comm;
3558   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3559   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3560   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3561   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3562   MPI_Request            *rwaits,*swaits;
3563   MPI_Status             *sstatus,rstatus;
3564   PetscInt               *cols;
3565   PetscScalar            *vals;
3566   PetscMPIInt            j;
3567 
3568   PetscFunctionBegin;
3569   if (a->cstart != b->rstart || a->cend != b->rend){
3570     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3571   }
3572   if (!logkey_GetBrowsOfAocols) {
3573     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3574   }
3575   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3576   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3577 
3578   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3579   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3580   rvalues  = gen_from->values; /* holds the length of sending row */
3581   svalues  = gen_to->values;   /* holds the length of receiving row */
3582   nrecvs   = gen_from->n;
3583   nsends   = gen_to->n;
3584   rwaits   = gen_from->requests;
3585   swaits   = gen_to->requests;
3586   srow     = gen_to->indices;   /* local row index to be sent */
3587   rstarts  = gen_from->starts;
3588   sstarts  = gen_to->starts;
3589   rprocs   = gen_from->procs;
3590   sprocs   = gen_to->procs;
3591   sstatus  = gen_to->sstatus;
3592 
3593   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3594   if (scall == MAT_INITIAL_MATRIX){
3595     /* i-array */
3596     /*---------*/
3597     /*  post receives */
3598     for (i=0; i<nrecvs; i++){
3599       rowlen = (PetscInt*)rvalues + rstarts[i];
3600       nrows = rstarts[i+1]-rstarts[i];
3601       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3602     }
3603 
3604     /* pack the outgoing message */
3605     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3606     rstartsj = sstartsj + nsends +1;
3607     sstartsj[0] = 0;  rstartsj[0] = 0;
3608     len = 0; /* total length of j or a array to be sent */
3609     k = 0;
3610     for (i=0; i<nsends; i++){
3611       rowlen = (PetscInt*)svalues + sstarts[i];
3612       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3613       for (j=0; j<nrows; j++) {
3614         row = srow[k] + b->rowners[rank]; /* global row idx */
3615         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3616         len += rowlen[j];
3617         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3618         k++;
3619       }
3620       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3621        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3622     }
3623     /* recvs and sends of i-array are completed */
3624     i = nrecvs;
3625     while (i--) {
3626       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3627     }
3628     if (nsends) {
3629       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3630     }
3631     /* allocate buffers for sending j and a arrays */
3632     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3633     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3634 
3635     /* create i-array of B_oth */
3636     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3637     b_othi[0] = 0;
3638     len = 0; /* total length of j or a array to be received */
3639     k = 0;
3640     for (i=0; i<nrecvs; i++){
3641       rowlen = (PetscInt*)rvalues + rstarts[i];
3642       nrows = rstarts[i+1]-rstarts[i];
3643       for (j=0; j<nrows; j++) {
3644         b_othi[k+1] = b_othi[k] + rowlen[j];
3645         len += rowlen[j]; k++;
3646       }
3647       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3648     }
3649 
3650     /* allocate space for j and a arrrays of B_oth */
3651     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3652     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3653 
3654     /* j-array */
3655     /*---------*/
3656     /*  post receives of j-array */
3657     for (i=0; i<nrecvs; i++){
3658       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3659       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3660     }
3661     k = 0;
3662     for (i=0; i<nsends; i++){
3663       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3664       bufJ = bufj+sstartsj[i];
3665       for (j=0; j<nrows; j++) {
3666         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3667         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3668         for (l=0; l<ncols; l++){
3669           *bufJ++ = cols[l];
3670         }
3671         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3672       }
3673       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3674     }
3675 
3676     /* recvs and sends of j-array are completed */
3677     i = nrecvs;
3678     while (i--) {
3679       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3680     }
3681     if (nsends) {
3682       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3683     }
3684   } else if (scall == MAT_REUSE_MATRIX){
3685     sstartsj = *startsj;
3686     rstartsj = sstartsj + nsends +1;
3687     bufa     = *bufa_ptr;
3688     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3689     b_otha   = b_oth->a;
3690   } else {
3691     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3692   }
3693 
3694   /* a-array */
3695   /*---------*/
3696   /*  post receives of a-array */
3697   for (i=0; i<nrecvs; i++){
3698     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3699     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3700   }
3701   k = 0;
3702   for (i=0; i<nsends; i++){
3703     nrows = sstarts[i+1]-sstarts[i];
3704     bufA = bufa+sstartsj[i];
3705     for (j=0; j<nrows; j++) {
3706       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3707       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3708       for (l=0; l<ncols; l++){
3709         *bufA++ = vals[l];
3710       }
3711       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3712 
3713     }
3714     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3715   }
3716   /* recvs and sends of a-array are completed */
3717   i = nrecvs;
3718   while (i--) {
3719     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720   }
3721    if (nsends) {
3722     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3723   }
3724 
3725   if (scall == MAT_INITIAL_MATRIX){
3726     /* put together the new matrix */
3727     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3728 
3729     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3730     /* Since these are PETSc arrays, change flags to free them as necessary. */
3731     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3732     b_oth->freedata = PETSC_TRUE;
3733     b_oth->nonew    = 0;
3734 
3735     ierr = PetscFree(bufj);CHKERRQ(ierr);
3736     if (!startsj || !bufa_ptr){
3737       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3738       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3739     } else {
3740       *startsj  = sstartsj;
3741       *bufa_ptr = bufa;
3742     }
3743   }
3744   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3745 
3746   PetscFunctionReturn(0);
3747 }
3748 
3749 /*MC
3750    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3751 
3752    Options Database Keys:
3753 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3754 
3755   Level: beginner
3756 
3757 .seealso: MatCreateMPIAIJ
3758 M*/
3759 
3760 EXTERN_C_BEGIN
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatCreate_MPIAIJ"
3763 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3764 {
3765   Mat_MPIAIJ     *b;
3766   PetscErrorCode ierr;
3767   PetscInt       i;
3768   PetscMPIInt    size;
3769 
3770   PetscFunctionBegin;
3771   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3772 
3773   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3774   B->data         = (void*)b;
3775   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3776   B->factor       = 0;
3777   B->bs           = 1;
3778   B->assembled    = PETSC_FALSE;
3779   B->mapping      = 0;
3780 
3781   B->insertmode      = NOT_SET_VALUES;
3782   b->size            = size;
3783   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3784 
3785   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3786   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3787 
3788   /* the information in the maps duplicates the information computed below, eventually
3789      we should remove the duplicate information that is not contained in the maps */
3790   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3791   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3792 
3793   /* build local table of row and column ownerships */
3794   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3795   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3796   b->cowners = b->rowners + b->size + 2;
3797   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3798   b->rowners[0] = 0;
3799   for (i=2; i<=b->size; i++) {
3800     b->rowners[i] += b->rowners[i-1];
3801   }
3802   b->rstart = b->rowners[b->rank];
3803   b->rend   = b->rowners[b->rank+1];
3804   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3805   b->cowners[0] = 0;
3806   for (i=2; i<=b->size; i++) {
3807     b->cowners[i] += b->cowners[i-1];
3808   }
3809   b->cstart = b->cowners[b->rank];
3810   b->cend   = b->cowners[b->rank+1];
3811 
3812   /* build cache for off array entries formed */
3813   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3814   b->donotstash  = PETSC_FALSE;
3815   b->colmap      = 0;
3816   b->garray      = 0;
3817   b->roworiented = PETSC_TRUE;
3818 
3819   /* stuff used for matrix vector multiply */
3820   b->lvec      = PETSC_NULL;
3821   b->Mvctx     = PETSC_NULL;
3822 
3823   /* stuff for MatGetRow() */
3824   b->rowindices   = 0;
3825   b->rowvalues    = 0;
3826   b->getrowactive = PETSC_FALSE;
3827 
3828   /* Explicitly create 2 MATSEQAIJ matrices. */
3829   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3830   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3831   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3832   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3833   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3834   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3835 
3836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3837                                      "MatStoreValues_MPIAIJ",
3838                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3840                                      "MatRetrieveValues_MPIAIJ",
3841                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3843 				     "MatGetDiagonalBlock_MPIAIJ",
3844                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3846 				     "MatIsTranspose_MPIAIJ",
3847 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3849 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3850 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3852 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3853 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3855 				     "MatDiagonalScaleLocal_MPIAIJ",
3856 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 EXTERN_C_END
3860