xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 770ba9b70756f805e2f35c818f540b6c3dfb3c2a)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 EXTERN_C_BEGIN
634 #undef __FUNCT__
635 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
637 {
638   MPI_Comm       comm;
639   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
640   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
641   IS             Me,Notme;
642   PetscErrorCode ierr;
643   PetscInt       M,N,first,last,*notme,i;
644   PetscMPIInt    size;
645 
646   PetscFunctionBegin;
647 
648   /* Easy test: symmetric diagonal block */
649   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
650   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
651   if (!*f) PetscFunctionReturn(0);
652   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
654   if (size == 1) PetscFunctionReturn(0);
655 
656   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
657   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
663   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
664   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
665   Aoff = Aoffs[0];
666   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
667   Boff = Boffs[0];
668   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
670   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
671   ierr = ISDestroy(Me);CHKERRQ(ierr);
672   ierr = ISDestroy(Notme);CHKERRQ(ierr);
673 
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
681 {
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683   PetscErrorCode ierr;
684 
685   PetscFunctionBegin;
686   /* do nondiagonal part */
687   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   PetscErrorCode ierr;
708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
709 
710   PetscFunctionBegin;
711   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
712   if (a->rstart != a->cstart || a->rend != a->cend) {
713     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
714   }
715   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatScale_MPIAIJ"
721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
722 {
723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
728   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPIAIJ"
734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
735 {
736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
747 #if defined (PETSC_USE_CTABLE)
748   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
749 #else
750   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
751 #endif
752   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
753   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
754   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
755   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
756   ierr = PetscFree(aij);CHKERRQ(ierr);
757 
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPIAIJ_Binary"
770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
773   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
774   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
775   PetscErrorCode    ierr;
776   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
777   int               fd;
778   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
779   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
780   PetscScalar       *column_values;
781 
782   PetscFunctionBegin;
783   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
784   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
785   nz   = A->nz + B->nz;
786   if (!rank) {
787     header[0] = MAT_FILE_COOKIE;
788     header[1] = mat->M;
789     header[2] = mat->N;
790     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
791     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
792     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
793     /* get largest number of rows any processor has */
794     rlen = mat->m;
795     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
796     for (i=1; i<size; i++) {
797       rlen = PetscMax(rlen,range[i+1] - range[i]);
798     }
799   } else {
800     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
801     rlen = mat->m;
802   }
803 
804   /* load up the local row counts */
805   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
806   for (i=0; i<mat->m; i++) {
807     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
808   }
809 
810   /* store the row lengths to the file */
811   if (!rank) {
812     MPI_Status status;
813     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     for (i=1; i<size; i++) {
815       rlen = range[i+1] - range[i];
816       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
817       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     }
819   } else {
820     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
823 
824   /* load up the local column indices */
825   nzmax = nz; /* )th processor needs space a largest processor needs */
826   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
827   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
828   cnt  = 0;
829   for (i=0; i<mat->m; i++) {
830     for (j=B->i[i]; j<B->i[i+1]; j++) {
831       if ( (col = garray[B->j[j]]) > cstart) break;
832       column_indices[cnt++] = col;
833     }
834     for (k=A->i[i]; k<A->i[i+1]; k++) {
835       column_indices[cnt++] = A->j[k] + cstart;
836     }
837     for (; j<B->i[i+1]; j++) {
838       column_indices[cnt++] = garray[B->j[j]];
839     }
840   }
841   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
842 
843   /* store the column indices to the file */
844   if (!rank) {
845     MPI_Status status;
846     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     for (i=1; i<size; i++) {
848       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
850       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
852     }
853   } else {
854     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
855     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(column_indices);CHKERRQ(ierr);
858 
859   /* load up the local column values */
860   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
861   cnt  = 0;
862   for (i=0; i<mat->m; i++) {
863     for (j=B->i[i]; j<B->i[i+1]; j++) {
864       if ( garray[B->j[j]] > cstart) break;
865       column_values[cnt++] = B->a[j];
866     }
867     for (k=A->i[i]; k<A->i[i+1]; k++) {
868       column_values[cnt++] = A->a[k];
869     }
870     for (; j<B->i[i+1]; j++) {
871       column_values[cnt++] = B->a[j];
872     }
873   }
874   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
875 
876   /* store the column values to the file */
877   if (!rank) {
878     MPI_Status status;
879     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     for (i=1; i<size; i++) {
881       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
883       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
885     }
886   } else {
887     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
888     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(column_values);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
897 {
898   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
899   PetscErrorCode    ierr;
900   PetscMPIInt       rank = aij->rank,size = aij->size;
901   PetscTruth        isdraw,iascii,flg,isbinary;
902   PetscViewer       sviewer;
903   PetscViewerFormat format;
904 
905   PetscFunctionBegin;
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
909   if (iascii) {
910     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
911     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
912       MatInfo info;
913       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
914       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
915       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
916       if (flg) {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
918 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
919       } else {
920         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
921 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
922       }
923       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
927       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
928       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       PetscFunctionReturn(0);
932     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
933       PetscFunctionReturn(0);
934     }
935   } else if (isbinary) {
936     if (size == 1) {
937       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
938       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
939     } else {
940       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
941     }
942     PetscFunctionReturn(0);
943   } else if (isdraw) {
944     PetscDraw  draw;
945     PetscTruth isnull;
946     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
947     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
948   }
949 
950   if (size == 1) {
951     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
952     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
953   } else {
954     /* assemble the entire matrix onto first processor. */
955     Mat         A;
956     Mat_SeqAIJ  *Aloc;
957     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
958     PetscScalar *a;
959 
960     if (!rank) {
961       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
962     } else {
963       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
964     }
965     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
966     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
967     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
968     PetscLogObjectParent(mat,A);
969 
970     /* copy over the A part */
971     Aloc = (Mat_SeqAIJ*)aij->A->data;
972     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
973     row = aij->rstart;
974     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
978     }
979     aj = Aloc->j;
980     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
981 
982     /* copy over the B part */
983     Aloc = (Mat_SeqAIJ*)aij->B->data;
984     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
985     row  = aij->rstart;
986     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
987     ct   = cols;
988     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
989     for (i=0; i<m; i++) {
990       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
991       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
992     }
993     ierr = PetscFree(ct);CHKERRQ(ierr);
994     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
995     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
996     /*
997        Everyone has to call to draw the matrix since the graphics waits are
998        synchronized across all processors that share the PetscDraw object
999     */
1000     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1001     if (!rank) {
1002       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1003       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1004     }
1005     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1006     ierr = MatDestroy(A);CHKERRQ(ierr);
1007   }
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatView_MPIAIJ"
1013 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1014 {
1015   PetscErrorCode ierr;
1016   PetscTruth     iascii,isdraw,issocket,isbinary;
1017 
1018   PetscFunctionBegin;
1019   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1020   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1021   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1022   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1023   if (iascii || isdraw || isbinary || issocket) {
1024     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1025   } else {
1026     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1027   }
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 
1032 
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatRelax_MPIAIJ"
1035 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1036 {
1037   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1038   PetscErrorCode ierr;
1039   Vec            bb1;
1040   PetscScalar    mone=-1.0;
1041 
1042   PetscFunctionBegin;
1043   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1044 
1045   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1046 
1047   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052 
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1066     if (flag & SOR_ZERO_INITIAL_GUESS) {
1067       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1068       its--;
1069     }
1070     while (its--) {
1071       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073 
1074       /* update rhs: bb1 = bb - B*x */
1075       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1076       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1077 
1078       /* local sweep */
1079       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1080       CHKERRQ(ierr);
1081     }
1082   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1083     if (flag & SOR_ZERO_INITIAL_GUESS) {
1084       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1085       its--;
1086     }
1087     while (its--) {
1088       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1089       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1090 
1091       /* update rhs: bb1 = bb - B*x */
1092       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1093       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1094 
1095       /* local sweep */
1096       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1097       CHKERRQ(ierr);
1098     }
1099   } else {
1100     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1101   }
1102 
1103   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1109 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1110 {
1111   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1112   Mat            A = mat->A,B = mat->B;
1113   PetscErrorCode ierr;
1114   PetscReal      isend[5],irecv[5];
1115 
1116   PetscFunctionBegin;
1117   info->block_size     = 1.0;
1118   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1119   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1120   isend[3] = info->memory;  isend[4] = info->mallocs;
1121   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1122   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1123   isend[3] += info->memory;  isend[4] += info->mallocs;
1124   if (flag == MAT_LOCAL) {
1125     info->nz_used      = isend[0];
1126     info->nz_allocated = isend[1];
1127     info->nz_unneeded  = isend[2];
1128     info->memory       = isend[3];
1129     info->mallocs      = isend[4];
1130   } else if (flag == MAT_GLOBAL_MAX) {
1131     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1132     info->nz_used      = irecv[0];
1133     info->nz_allocated = irecv[1];
1134     info->nz_unneeded  = irecv[2];
1135     info->memory       = irecv[3];
1136     info->mallocs      = irecv[4];
1137   } else if (flag == MAT_GLOBAL_SUM) {
1138     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1139     info->nz_used      = irecv[0];
1140     info->nz_allocated = irecv[1];
1141     info->nz_unneeded  = irecv[2];
1142     info->memory       = irecv[3];
1143     info->mallocs      = irecv[4];
1144   }
1145   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1146   info->fill_ratio_needed = 0;
1147   info->factor_mallocs    = 0;
1148   info->rows_global       = (double)matin->M;
1149   info->columns_global    = (double)matin->N;
1150   info->rows_local        = (double)matin->m;
1151   info->columns_local     = (double)matin->N;
1152 
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatSetOption_MPIAIJ"
1158 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1159 {
1160   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   switch (op) {
1165   case MAT_NO_NEW_NONZERO_LOCATIONS:
1166   case MAT_YES_NEW_NONZERO_LOCATIONS:
1167   case MAT_COLUMNS_UNSORTED:
1168   case MAT_COLUMNS_SORTED:
1169   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1170   case MAT_KEEP_ZEROED_ROWS:
1171   case MAT_NEW_NONZERO_LOCATION_ERR:
1172   case MAT_USE_INODES:
1173   case MAT_DO_NOT_USE_INODES:
1174   case MAT_IGNORE_ZERO_ENTRIES:
1175     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1176     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1177     break;
1178   case MAT_ROW_ORIENTED:
1179     a->roworiented = PETSC_TRUE;
1180     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1181     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1182     break;
1183   case MAT_ROWS_SORTED:
1184   case MAT_ROWS_UNSORTED:
1185   case MAT_YES_NEW_DIAGONALS:
1186     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1187     break;
1188   case MAT_COLUMN_ORIENTED:
1189     a->roworiented = PETSC_FALSE;
1190     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1191     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1192     break;
1193   case MAT_IGNORE_OFF_PROC_ENTRIES:
1194     a->donotstash = PETSC_TRUE;
1195     break;
1196   case MAT_NO_NEW_DIAGONALS:
1197     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1198   case MAT_SYMMETRIC:
1199   case MAT_STRUCTURALLY_SYMMETRIC:
1200   case MAT_HERMITIAN:
1201   case MAT_SYMMETRY_ETERNAL:
1202     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1203     break;
1204   case MAT_NOT_SYMMETRIC:
1205   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1206   case MAT_NOT_HERMITIAN:
1207   case MAT_NOT_SYMMETRY_ETERNAL:
1208     break;
1209   default:
1210     SETERRQ(PETSC_ERR_SUP,"unknown option");
1211   }
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 #undef __FUNCT__
1216 #define __FUNCT__ "MatGetRow_MPIAIJ"
1217 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1218 {
1219   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1220   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1221   PetscErrorCode ierr;
1222   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1223   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1224   PetscInt       *cmap,*idx_p;
1225 
1226   PetscFunctionBegin;
1227   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1228   mat->getrowactive = PETSC_TRUE;
1229 
1230   if (!mat->rowvalues && (idx || v)) {
1231     /*
1232         allocate enough space to hold information from the longest row.
1233     */
1234     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1235     PetscInt     max = 1,tmp;
1236     for (i=0; i<matin->m; i++) {
1237       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1238       if (max < tmp) { max = tmp; }
1239     }
1240     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1241     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1242   }
1243 
1244   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1245   lrow = row - rstart;
1246 
1247   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1248   if (!v)   {pvA = 0; pvB = 0;}
1249   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1250   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1251   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1252   nztot = nzA + nzB;
1253 
1254   cmap  = mat->garray;
1255   if (v  || idx) {
1256     if (nztot) {
1257       /* Sort by increasing column numbers, assuming A and B already sorted */
1258       PetscInt imark = -1;
1259       if (v) {
1260         *v = v_p = mat->rowvalues;
1261         for (i=0; i<nzB; i++) {
1262           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1263           else break;
1264         }
1265         imark = i;
1266         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1267         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1268       }
1269       if (idx) {
1270         *idx = idx_p = mat->rowindices;
1271         if (imark > -1) {
1272           for (i=0; i<imark; i++) {
1273             idx_p[i] = cmap[cworkB[i]];
1274           }
1275         } else {
1276           for (i=0; i<nzB; i++) {
1277             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1278             else break;
1279           }
1280           imark = i;
1281         }
1282         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1283         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1284       }
1285     } else {
1286       if (idx) *idx = 0;
1287       if (v)   *v   = 0;
1288     }
1289   }
1290   *nz = nztot;
1291   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1298 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1299 {
1300   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1301 
1302   PetscFunctionBegin;
1303   if (!aij->getrowactive) {
1304     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1305   }
1306   aij->getrowactive = PETSC_FALSE;
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatNorm_MPIAIJ"
1312 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1313 {
1314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1315   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1316   PetscErrorCode ierr;
1317   PetscInt       i,j,cstart = aij->cstart;
1318   PetscReal      sum = 0.0;
1319   PetscScalar    *v;
1320 
1321   PetscFunctionBegin;
1322   if (aij->size == 1) {
1323     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1324   } else {
1325     if (type == NORM_FROBENIUS) {
1326       v = amat->a;
1327       for (i=0; i<amat->nz; i++) {
1328 #if defined(PETSC_USE_COMPLEX)
1329         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1330 #else
1331         sum += (*v)*(*v); v++;
1332 #endif
1333       }
1334       v = bmat->a;
1335       for (i=0; i<bmat->nz; i++) {
1336 #if defined(PETSC_USE_COMPLEX)
1337         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1338 #else
1339         sum += (*v)*(*v); v++;
1340 #endif
1341       }
1342       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1343       *norm = sqrt(*norm);
1344     } else if (type == NORM_1) { /* max column norm */
1345       PetscReal *tmp,*tmp2;
1346       PetscInt    *jj,*garray = aij->garray;
1347       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1348       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1349       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1350       *norm = 0.0;
1351       v = amat->a; jj = amat->j;
1352       for (j=0; j<amat->nz; j++) {
1353         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1354       }
1355       v = bmat->a; jj = bmat->j;
1356       for (j=0; j<bmat->nz; j++) {
1357         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1358       }
1359       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1360       for (j=0; j<mat->N; j++) {
1361         if (tmp2[j] > *norm) *norm = tmp2[j];
1362       }
1363       ierr = PetscFree(tmp);CHKERRQ(ierr);
1364       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1365     } else if (type == NORM_INFINITY) { /* max row norm */
1366       PetscReal ntemp = 0.0;
1367       for (j=0; j<aij->A->m; j++) {
1368         v = amat->a + amat->i[j];
1369         sum = 0.0;
1370         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         v = bmat->a + bmat->i[j];
1374         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1375           sum += PetscAbsScalar(*v); v++;
1376         }
1377         if (sum > ntemp) ntemp = sum;
1378       }
1379       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1380     } else {
1381       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1382     }
1383   }
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatTranspose_MPIAIJ"
1389 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1390 {
1391   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1392   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1393   PetscErrorCode ierr;
1394   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1395   Mat            B;
1396   PetscScalar    *array;
1397 
1398   PetscFunctionBegin;
1399   if (!matout && M != N) {
1400     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1401   }
1402 
1403   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1404   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1405   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1406 
1407   /* copy over the A part */
1408   Aloc = (Mat_SeqAIJ*)a->A->data;
1409   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1410   row = a->rstart;
1411   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1412   for (i=0; i<m; i++) {
1413     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1414     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1415   }
1416   aj = Aloc->j;
1417   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1418 
1419   /* copy over the B part */
1420   Aloc = (Mat_SeqAIJ*)a->B->data;
1421   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1422   row  = a->rstart;
1423   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1424   ct   = cols;
1425   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1426   for (i=0; i<m; i++) {
1427     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1428     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1429   }
1430   ierr = PetscFree(ct);CHKERRQ(ierr);
1431   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1432   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1433   if (matout) {
1434     *matout = B;
1435   } else {
1436     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1443 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1444 {
1445   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1446   Mat            a = aij->A,b = aij->B;
1447   PetscErrorCode ierr;
1448   PetscInt       s1,s2,s3;
1449 
1450   PetscFunctionBegin;
1451   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1452   if (rr) {
1453     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1454     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1455     /* Overlap communication with computation. */
1456     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1457   }
1458   if (ll) {
1459     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1460     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1461     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1462   }
1463   /* scale  the diagonal block */
1464   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1465 
1466   if (rr) {
1467     /* Do a scatter end and then right scale the off-diagonal block */
1468     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1469     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1470   }
1471 
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1478 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1479 {
1480   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1481   PetscErrorCode ierr;
1482 
1483   PetscFunctionBegin;
1484   if (!a->rank) {
1485     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1486   }
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1492 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1493 {
1494   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1499   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1504 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1505 {
1506   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatEqual_MPIAIJ"
1516 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1517 {
1518   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1519   Mat            a,b,c,d;
1520   PetscTruth     flg;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   a = matA->A; b = matA->B;
1525   c = matB->A; d = matB->B;
1526 
1527   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1528   if (flg) {
1529     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1530   }
1531   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatCopy_MPIAIJ"
1537 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1538 {
1539   PetscErrorCode ierr;
1540   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1541   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1542 
1543   PetscFunctionBegin;
1544   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1545   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1546     /* because of the column compression in the off-processor part of the matrix a->B,
1547        the number of columns in a->B and b->B may be different, hence we cannot call
1548        the MatCopy() directly on the two parts. If need be, we can provide a more
1549        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1550        then copying the submatrices */
1551     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1552   } else {
1553     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1554     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1561 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1562 {
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #include "petscblaslapack.h"
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatAXPY_MPIAIJ"
1573 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1574 {
1575   PetscErrorCode ierr;
1576   PetscInt       i;
1577   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1578   PetscBLASInt   bnz,one=1;
1579   Mat_SeqAIJ     *x,*y;
1580 
1581   PetscFunctionBegin;
1582   if (str == SAME_NONZERO_PATTERN) {
1583     x = (Mat_SeqAIJ *)xx->A->data;
1584     y = (Mat_SeqAIJ *)yy->A->data;
1585     bnz = (PetscBLASInt)x->nz;
1586     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1587     x = (Mat_SeqAIJ *)xx->B->data;
1588     y = (Mat_SeqAIJ *)yy->B->data;
1589     bnz = (PetscBLASInt)x->nz;
1590     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1591   } else if (str == SUBSET_NONZERO_PATTERN) {
1592     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1593 
1594     x = (Mat_SeqAIJ *)xx->B->data;
1595     y = (Mat_SeqAIJ *)yy->B->data;
1596     if (y->xtoy && y->XtoY != xx->B) {
1597       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1598       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1599     }
1600     if (!y->xtoy) { /* get xtoy */
1601       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1602       y->XtoY = xx->B;
1603     }
1604     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1605   } else {
1606     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 /* -------------------------------------------------------------------*/
1612 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1613        MatGetRow_MPIAIJ,
1614        MatRestoreRow_MPIAIJ,
1615        MatMult_MPIAIJ,
1616 /* 4*/ MatMultAdd_MPIAIJ,
1617        MatMultTranspose_MPIAIJ,
1618        MatMultTransposeAdd_MPIAIJ,
1619        0,
1620        0,
1621        0,
1622 /*10*/ 0,
1623        0,
1624        0,
1625        MatRelax_MPIAIJ,
1626        MatTranspose_MPIAIJ,
1627 /*15*/ MatGetInfo_MPIAIJ,
1628        MatEqual_MPIAIJ,
1629        MatGetDiagonal_MPIAIJ,
1630        MatDiagonalScale_MPIAIJ,
1631        MatNorm_MPIAIJ,
1632 /*20*/ MatAssemblyBegin_MPIAIJ,
1633        MatAssemblyEnd_MPIAIJ,
1634        0,
1635        MatSetOption_MPIAIJ,
1636        MatZeroEntries_MPIAIJ,
1637 /*25*/ MatZeroRows_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*30*/ MatSetUpPreallocation_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        0,
1647 /*35*/ MatDuplicate_MPIAIJ,
1648        0,
1649        0,
1650        0,
1651        0,
1652 /*40*/ MatAXPY_MPIAIJ,
1653        MatGetSubMatrices_MPIAIJ,
1654        MatIncreaseOverlap_MPIAIJ,
1655        MatGetValues_MPIAIJ,
1656        MatCopy_MPIAIJ,
1657 /*45*/ MatPrintHelp_MPIAIJ,
1658        MatScale_MPIAIJ,
1659        0,
1660        0,
1661        0,
1662 /*50*/ MatSetBlockSize_MPIAIJ,
1663        0,
1664        0,
1665        0,
1666        0,
1667 /*55*/ MatFDColoringCreate_MPIAIJ,
1668        0,
1669        MatSetUnfactored_MPIAIJ,
1670        0,
1671        0,
1672 /*60*/ MatGetSubMatrix_MPIAIJ,
1673        MatDestroy_MPIAIJ,
1674        MatView_MPIAIJ,
1675        MatGetPetscMaps_Petsc,
1676        0,
1677 /*65*/ 0,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*70*/ 0,
1683        0,
1684        MatSetColoring_MPIAIJ,
1685 #if defined(PETSC_HAVE_ADIC)
1686        MatSetValuesAdic_MPIAIJ,
1687 #else
1688        0,
1689 #endif
1690        MatSetValuesAdifor_MPIAIJ,
1691 /*75*/ 0,
1692        0,
1693        0,
1694        0,
1695        0,
1696 /*80*/ 0,
1697        0,
1698        0,
1699        0,
1700 /*84*/ MatLoad_MPIAIJ,
1701        0,
1702        0,
1703        0,
1704        0,
1705        0,
1706 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1707        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1708        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1709        MatPtAP_MPIAIJ_MPIAIJ,
1710        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1711 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1712        0,
1713        0,
1714        0};
1715 
1716 /* ----------------------------------------------------------------------------------------*/
1717 
1718 EXTERN_C_BEGIN
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1721 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1722 {
1723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1728   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 EXTERN_C_END
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1736 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1737 {
1738   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1743   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1744   PetscFunctionReturn(0);
1745 }
1746 EXTERN_C_END
1747 
1748 #include "petscpc.h"
1749 EXTERN_C_BEGIN
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1752 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1753 {
1754   Mat_MPIAIJ     *b;
1755   PetscErrorCode ierr;
1756   PetscInt       i;
1757 
1758   PetscFunctionBegin;
1759   B->preallocated = PETSC_TRUE;
1760   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1761   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1762   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1763   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1764   if (d_nnz) {
1765     for (i=0; i<B->m; i++) {
1766       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1767     }
1768   }
1769   if (o_nnz) {
1770     for (i=0; i<B->m; i++) {
1771       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1772     }
1773   }
1774   b = (Mat_MPIAIJ*)B->data;
1775   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1776   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1777 
1778   PetscFunctionReturn(0);
1779 }
1780 EXTERN_C_END
1781 
1782 /*MC
1783    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1784 
1785    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1786    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1787   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1788   for communicators controlling multiple processes.  It is recommended that you call both of
1789   the above preallocation routines for simplicity.
1790 
1791    Options Database Keys:
1792 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1793 
1794   Level: beginner
1795 
1796 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1797 M*/
1798 
1799 EXTERN_C_BEGIN
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatCreate_AIJ"
1802 PetscErrorCode MatCreate_AIJ(Mat A)
1803 {
1804   PetscErrorCode ierr;
1805   PetscMPIInt    size;
1806 
1807   PetscFunctionBegin;
1808   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1809   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1810   if (size == 1) {
1811     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1812   } else {
1813     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1814   }
1815   PetscFunctionReturn(0);
1816 }
1817 EXTERN_C_END
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1822 {
1823   Mat            mat;
1824   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   *newmat       = 0;
1829   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1830   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1831   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1832   a    = (Mat_MPIAIJ*)mat->data;
1833 
1834   mat->factor       = matin->factor;
1835   mat->bs           = matin->bs;
1836   mat->assembled    = PETSC_TRUE;
1837   mat->insertmode   = NOT_SET_VALUES;
1838   mat->preallocated = PETSC_TRUE;
1839 
1840   a->rstart       = oldmat->rstart;
1841   a->rend         = oldmat->rend;
1842   a->cstart       = oldmat->cstart;
1843   a->cend         = oldmat->cend;
1844   a->size         = oldmat->size;
1845   a->rank         = oldmat->rank;
1846   a->donotstash   = oldmat->donotstash;
1847   a->roworiented  = oldmat->roworiented;
1848   a->rowindices   = 0;
1849   a->rowvalues    = 0;
1850   a->getrowactive = PETSC_FALSE;
1851 
1852   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1853   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1854   if (oldmat->colmap) {
1855 #if defined (PETSC_USE_CTABLE)
1856     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1857 #else
1858     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1859     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1860     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1861 #endif
1862   } else a->colmap = 0;
1863   if (oldmat->garray) {
1864     PetscInt len;
1865     len  = oldmat->B->n;
1866     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1867     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1868     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1869   } else a->garray = 0;
1870 
1871   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1872   PetscLogObjectParent(mat,a->lvec);
1873   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1874   PetscLogObjectParent(mat,a->Mvctx);
1875   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1876   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1877   PetscLogObjectParent(mat,a->A);
1878   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1879   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1880   PetscLogObjectParent(mat,a->B);
1881   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1882   *newmat = mat;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #include "petscsys.h"
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatLoad_MPIAIJ"
1890 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1891 {
1892   Mat            A;
1893   PetscScalar    *vals,*svals;
1894   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1895   MPI_Status     status;
1896   PetscErrorCode ierr;
1897   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1898   PetscInt       i,nz,j,rstart,rend;
1899   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1900   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1901   PetscInt       cend,cstart,n,*rowners;
1902   int            fd;
1903 
1904   PetscFunctionBegin;
1905   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1906   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1907   if (!rank) {
1908     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1909     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1910     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1911   }
1912 
1913   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1914   M = header[1]; N = header[2];
1915   /* determine ownership of all rows */
1916   m    = M/size + ((M % size) > rank);
1917   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1918   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1919   rowners[0] = 0;
1920   for (i=2; i<=size; i++) {
1921     rowners[i] += rowners[i-1];
1922   }
1923   rstart = rowners[rank];
1924   rend   = rowners[rank+1];
1925 
1926   /* distribute row lengths to all processors */
1927   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1928   if (!rank) {
1929     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1930     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1931     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1932     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1933     for (j=0; j<m; j++) {
1934       procsnz[0] += ourlens[j];
1935     }
1936     for (i=1; i<size; i++) {
1937       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1938       /* calculate the number of nonzeros on each processor */
1939       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1940         procsnz[i] += rowlengths[j];
1941       }
1942       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1943     }
1944     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1945   } else {
1946     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1947   }
1948 
1949   if (!rank) {
1950     /* determine max buffer needed and allocate it */
1951     maxnz = 0;
1952     for (i=0; i<size; i++) {
1953       maxnz = PetscMax(maxnz,procsnz[i]);
1954     }
1955     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1956 
1957     /* read in my part of the matrix column indices  */
1958     nz   = procsnz[0];
1959     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1960     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1961 
1962     /* read in every one elses and ship off */
1963     for (i=1; i<size; i++) {
1964       nz   = procsnz[i];
1965       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1966       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1967     }
1968     ierr = PetscFree(cols);CHKERRQ(ierr);
1969   } else {
1970     /* determine buffer space needed for message */
1971     nz = 0;
1972     for (i=0; i<m; i++) {
1973       nz += ourlens[i];
1974     }
1975     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1976 
1977     /* receive message of column indices*/
1978     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1979     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1980     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1981   }
1982 
1983   /* determine column ownership if matrix is not square */
1984   if (N != M) {
1985     n      = N/size + ((N % size) > rank);
1986     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1987     cstart = cend - n;
1988   } else {
1989     cstart = rstart;
1990     cend   = rend;
1991     n      = cend - cstart;
1992   }
1993 
1994   /* loop over local rows, determining number of off diagonal entries */
1995   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1996   jj = 0;
1997   for (i=0; i<m; i++) {
1998     for (j=0; j<ourlens[i]; j++) {
1999       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2000       jj++;
2001     }
2002   }
2003 
2004   /* create our matrix */
2005   for (i=0; i<m; i++) {
2006     ourlens[i] -= offlens[i];
2007   }
2008   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2009   ierr = MatSetType(A,type);CHKERRQ(ierr);
2010   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2011 
2012   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2013   for (i=0; i<m; i++) {
2014     ourlens[i] += offlens[i];
2015   }
2016 
2017   if (!rank) {
2018     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2019 
2020     /* read in my part of the matrix numerical values  */
2021     nz   = procsnz[0];
2022     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2023 
2024     /* insert into matrix */
2025     jj      = rstart;
2026     smycols = mycols;
2027     svals   = vals;
2028     for (i=0; i<m; i++) {
2029       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2030       smycols += ourlens[i];
2031       svals   += ourlens[i];
2032       jj++;
2033     }
2034 
2035     /* read in other processors and ship out */
2036     for (i=1; i<size; i++) {
2037       nz   = procsnz[i];
2038       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2039       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2040     }
2041     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2042   } else {
2043     /* receive numeric values */
2044     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2045 
2046     /* receive message of values*/
2047     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2048     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2049     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2050 
2051     /* insert into matrix */
2052     jj      = rstart;
2053     smycols = mycols;
2054     svals   = vals;
2055     for (i=0; i<m; i++) {
2056       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2057       smycols += ourlens[i];
2058       svals   += ourlens[i];
2059       jj++;
2060     }
2061   }
2062   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2063   ierr = PetscFree(vals);CHKERRQ(ierr);
2064   ierr = PetscFree(mycols);CHKERRQ(ierr);
2065   ierr = PetscFree(rowners);CHKERRQ(ierr);
2066 
2067   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   *newmat = A;
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2075 /*
2076     Not great since it makes two copies of the submatrix, first an SeqAIJ
2077   in local and then by concatenating the local matrices the end result.
2078   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2079 */
2080 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2081 {
2082   PetscErrorCode ierr;
2083   PetscMPIInt    rank,size;
2084   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2085   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2086   Mat            *local,M,Mreuse;
2087   PetscScalar    *vwork,*aa;
2088   MPI_Comm       comm = mat->comm;
2089   Mat_SeqAIJ     *aij;
2090 
2091 
2092   PetscFunctionBegin;
2093   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2095 
2096   if (call ==  MAT_REUSE_MATRIX) {
2097     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2098     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2099     local = &Mreuse;
2100     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2101   } else {
2102     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2103     Mreuse = *local;
2104     ierr   = PetscFree(local);CHKERRQ(ierr);
2105   }
2106 
2107   /*
2108       m - number of local rows
2109       n - number of columns (same on all processors)
2110       rstart - first row in new global matrix generated
2111   */
2112   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2113   if (call == MAT_INITIAL_MATRIX) {
2114     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2115     ii  = aij->i;
2116     jj  = aij->j;
2117 
2118     /*
2119         Determine the number of non-zeros in the diagonal and off-diagonal
2120         portions of the matrix in order to do correct preallocation
2121     */
2122 
2123     /* first get start and end of "diagonal" columns */
2124     if (csize == PETSC_DECIDE) {
2125       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2126       if (mglobal == n) { /* square matrix */
2127 	nlocal = m;
2128       } else {
2129         nlocal = n/size + ((n % size) > rank);
2130       }
2131     } else {
2132       nlocal = csize;
2133     }
2134     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2135     rstart = rend - nlocal;
2136     if (rank == size - 1 && rend != n) {
2137       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2138     }
2139 
2140     /* next, compute all the lengths */
2141     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2142     olens = dlens + m;
2143     for (i=0; i<m; i++) {
2144       jend = ii[i+1] - ii[i];
2145       olen = 0;
2146       dlen = 0;
2147       for (j=0; j<jend; j++) {
2148         if (*jj < rstart || *jj >= rend) olen++;
2149         else dlen++;
2150         jj++;
2151       }
2152       olens[i] = olen;
2153       dlens[i] = dlen;
2154     }
2155     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2156     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2157     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2158     ierr = PetscFree(dlens);CHKERRQ(ierr);
2159   } else {
2160     PetscInt ml,nl;
2161 
2162     M = *newmat;
2163     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2164     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2165     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2166     /*
2167          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2168        rather than the slower MatSetValues().
2169     */
2170     M->was_assembled = PETSC_TRUE;
2171     M->assembled     = PETSC_FALSE;
2172   }
2173   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2174   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2175   ii  = aij->i;
2176   jj  = aij->j;
2177   aa  = aij->a;
2178   for (i=0; i<m; i++) {
2179     row   = rstart + i;
2180     nz    = ii[i+1] - ii[i];
2181     cwork = jj;     jj += nz;
2182     vwork = aa;     aa += nz;
2183     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2184   }
2185 
2186   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2187   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2188   *newmat = M;
2189 
2190   /* save submatrix used in processor for next request */
2191   if (call ==  MAT_INITIAL_MATRIX) {
2192     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2193     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2194   }
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2202 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2203 {
2204   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2205   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2206   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2207   const PetscInt *JJ;
2208   PetscScalar    *values;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212 #if defined(PETSC_OPT_g)
2213   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2214 #endif
2215   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2216   o_nnz = d_nnz + m;
2217 
2218   for (i=0; i<m; i++) {
2219     nnz     = I[i+1]- I[i];
2220     JJ      = J + I[i];
2221     nnz_max = PetscMax(nnz_max,nnz);
2222 #if defined(PETSC_OPT_g)
2223     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2224 #endif
2225     for (j=0; j<nnz; j++) {
2226       if (*JJ >= cstart) break;
2227       JJ++;
2228     }
2229     d = 0;
2230     for (; j<nnz; j++) {
2231       if (*JJ++ >= cend) break;
2232       d++;
2233     }
2234     d_nnz[i] = d;
2235     o_nnz[i] = nnz - d;
2236   }
2237   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2238   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2239 
2240   if (v) values = (PetscScalar*)v;
2241   else {
2242     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2243     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2244   }
2245 
2246   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2247   for (i=0; i<m; i++) {
2248     ii   = i + rstart;
2249     nnz  = I[i+1]- I[i];
2250     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2251   }
2252   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2253   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2254   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2255 
2256   if (!v) {
2257     ierr = PetscFree(values);CHKERRQ(ierr);
2258   }
2259   PetscFunctionReturn(0);
2260 }
2261 EXTERN_C_END
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2265 /*@C
2266    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2267    (the default parallel PETSc format).
2268 
2269    Collective on MPI_Comm
2270 
2271    Input Parameters:
2272 +  A - the matrix
2273 .  i - the indices into j for the start of each local row (starts with zero)
2274 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2275 -  v - optional values in the matrix
2276 
2277    Level: developer
2278 
2279 .keywords: matrix, aij, compressed row, sparse, parallel
2280 
2281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2282 @*/
2283 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2284 {
2285   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2286 
2287   PetscFunctionBegin;
2288   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2289   if (f) {
2290     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2297 /*@C
2298    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2299    (the default parallel PETSc format).  For good matrix assembly performance
2300    the user should preallocate the matrix storage by setting the parameters
2301    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2302    performance can be increased by more than a factor of 50.
2303 
2304    Collective on MPI_Comm
2305 
2306    Input Parameters:
2307 +  A - the matrix
2308 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2309            (same value is used for all local rows)
2310 .  d_nnz - array containing the number of nonzeros in the various rows of the
2311            DIAGONAL portion of the local submatrix (possibly different for each row)
2312            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2313            The size of this array is equal to the number of local rows, i.e 'm'.
2314            You must leave room for the diagonal entry even if it is zero.
2315 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2316            submatrix (same value is used for all local rows).
2317 -  o_nnz - array containing the number of nonzeros in the various rows of the
2318            OFF-DIAGONAL portion of the local submatrix (possibly different for
2319            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2320            structure. The size of this array is equal to the number
2321            of local rows, i.e 'm'.
2322 
2323    If the *_nnz parameter is given then the *_nz parameter is ignored
2324 
2325    The AIJ format (also called the Yale sparse matrix format or
2326    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2327    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2328 
2329    The parallel matrix is partitioned such that the first m0 rows belong to
2330    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2331    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2332 
2333    The DIAGONAL portion of the local submatrix of a processor can be defined
2334    as the submatrix which is obtained by extraction the part corresponding
2335    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2336    first row that belongs to the processor, and r2 is the last row belonging
2337    to the this processor. This is a square mxm matrix. The remaining portion
2338    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2339 
2340    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2341 
2342    Example usage:
2343 
2344    Consider the following 8x8 matrix with 34 non-zero values, that is
2345    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2346    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2347    as follows:
2348 
2349 .vb
2350             1  2  0  |  0  3  0  |  0  4
2351     Proc0   0  5  6  |  7  0  0  |  8  0
2352             9  0 10  | 11  0  0  | 12  0
2353     -------------------------------------
2354            13  0 14  | 15 16 17  |  0  0
2355     Proc1   0 18  0  | 19 20 21  |  0  0
2356             0  0  0  | 22 23  0  | 24  0
2357     -------------------------------------
2358     Proc2  25 26 27  |  0  0 28  | 29  0
2359            30  0  0  | 31 32 33  |  0 34
2360 .ve
2361 
2362    This can be represented as a collection of submatrices as:
2363 
2364 .vb
2365       A B C
2366       D E F
2367       G H I
2368 .ve
2369 
2370    Where the submatrices A,B,C are owned by proc0, D,E,F are
2371    owned by proc1, G,H,I are owned by proc2.
2372 
2373    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2374    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2375    The 'M','N' parameters are 8,8, and have the same values on all procs.
2376 
2377    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2378    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2379    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2380    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2381    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2382    matrix, ans [DF] as another SeqAIJ matrix.
2383 
2384    When d_nz, o_nz parameters are specified, d_nz storage elements are
2385    allocated for every row of the local diagonal submatrix, and o_nz
2386    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2387    One way to choose d_nz and o_nz is to use the max nonzerors per local
2388    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2389    In this case, the values of d_nz,o_nz are:
2390 .vb
2391      proc0 : dnz = 2, o_nz = 2
2392      proc1 : dnz = 3, o_nz = 2
2393      proc2 : dnz = 1, o_nz = 4
2394 .ve
2395    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2396    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2397    for proc3. i.e we are using 12+15+10=37 storage locations to store
2398    34 values.
2399 
2400    When d_nnz, o_nnz parameters are specified, the storage is specified
2401    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2402    In the above case the values for d_nnz,o_nnz are:
2403 .vb
2404      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2405      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2406      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2407 .ve
2408    Here the space allocated is sum of all the above values i.e 34, and
2409    hence pre-allocation is perfect.
2410 
2411    Level: intermediate
2412 
2413 .keywords: matrix, aij, compressed row, sparse, parallel
2414 
2415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2416           MPIAIJ
2417 @*/
2418 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2419 {
2420   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2421 
2422   PetscFunctionBegin;
2423   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2424   if (f) {
2425     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2426   }
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatCreateMPIAIJ"
2432 /*@C
2433    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2434    (the default parallel PETSc format).  For good matrix assembly performance
2435    the user should preallocate the matrix storage by setting the parameters
2436    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2437    performance can be increased by more than a factor of 50.
2438 
2439    Collective on MPI_Comm
2440 
2441    Input Parameters:
2442 +  comm - MPI communicator
2443 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2444            This value should be the same as the local size used in creating the
2445            y vector for the matrix-vector product y = Ax.
2446 .  n - This value should be the same as the local size used in creating the
2447        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2448        calculated if N is given) For square matrices n is almost always m.
2449 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2450 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2451 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2452            (same value is used for all local rows)
2453 .  d_nnz - array containing the number of nonzeros in the various rows of the
2454            DIAGONAL portion of the local submatrix (possibly different for each row)
2455            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2456            The size of this array is equal to the number of local rows, i.e 'm'.
2457            You must leave room for the diagonal entry even if it is zero.
2458 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2459            submatrix (same value is used for all local rows).
2460 -  o_nnz - array containing the number of nonzeros in the various rows of the
2461            OFF-DIAGONAL portion of the local submatrix (possibly different for
2462            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2463            structure. The size of this array is equal to the number
2464            of local rows, i.e 'm'.
2465 
2466    Output Parameter:
2467 .  A - the matrix
2468 
2469    Notes:
2470    If the *_nnz parameter is given then the *_nz parameter is ignored
2471 
2472    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2473    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2474    storage requirements for this matrix.
2475 
2476    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2477    processor than it must be used on all processors that share the object for
2478    that argument.
2479 
2480    The user MUST specify either the local or global matrix dimensions
2481    (possibly both).
2482 
2483    The parallel matrix is partitioned such that the first m0 rows belong to
2484    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2485    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2486 
2487    The DIAGONAL portion of the local submatrix of a processor can be defined
2488    as the submatrix which is obtained by extraction the part corresponding
2489    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2490    first row that belongs to the processor, and r2 is the last row belonging
2491    to the this processor. This is a square mxm matrix. The remaining portion
2492    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2493 
2494    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2495 
2496    When calling this routine with a single process communicator, a matrix of
2497    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2498    type of communicator, use the construction mechanism:
2499      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2500 
2501    By default, this format uses inodes (identical nodes) when possible.
2502    We search for consecutive rows with the same nonzero structure, thereby
2503    reusing matrix information to achieve increased efficiency.
2504 
2505    Options Database Keys:
2506 +  -mat_aij_no_inode  - Do not use inodes
2507 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2508 -  -mat_aij_oneindex - Internally use indexing starting at 1
2509         rather than 0.  Note that when calling MatSetValues(),
2510         the user still MUST index entries starting at 0!
2511 
2512 
2513    Example usage:
2514 
2515    Consider the following 8x8 matrix with 34 non-zero values, that is
2516    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2517    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2518    as follows:
2519 
2520 .vb
2521             1  2  0  |  0  3  0  |  0  4
2522     Proc0   0  5  6  |  7  0  0  |  8  0
2523             9  0 10  | 11  0  0  | 12  0
2524     -------------------------------------
2525            13  0 14  | 15 16 17  |  0  0
2526     Proc1   0 18  0  | 19 20 21  |  0  0
2527             0  0  0  | 22 23  0  | 24  0
2528     -------------------------------------
2529     Proc2  25 26 27  |  0  0 28  | 29  0
2530            30  0  0  | 31 32 33  |  0 34
2531 .ve
2532 
2533    This can be represented as a collection of submatrices as:
2534 
2535 .vb
2536       A B C
2537       D E F
2538       G H I
2539 .ve
2540 
2541    Where the submatrices A,B,C are owned by proc0, D,E,F are
2542    owned by proc1, G,H,I are owned by proc2.
2543 
2544    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2545    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2546    The 'M','N' parameters are 8,8, and have the same values on all procs.
2547 
2548    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2549    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2550    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2551    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2552    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2553    matrix, ans [DF] as another SeqAIJ matrix.
2554 
2555    When d_nz, o_nz parameters are specified, d_nz storage elements are
2556    allocated for every row of the local diagonal submatrix, and o_nz
2557    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2558    One way to choose d_nz and o_nz is to use the max nonzerors per local
2559    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2560    In this case, the values of d_nz,o_nz are:
2561 .vb
2562      proc0 : dnz = 2, o_nz = 2
2563      proc1 : dnz = 3, o_nz = 2
2564      proc2 : dnz = 1, o_nz = 4
2565 .ve
2566    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2567    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2568    for proc3. i.e we are using 12+15+10=37 storage locations to store
2569    34 values.
2570 
2571    When d_nnz, o_nnz parameters are specified, the storage is specified
2572    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2573    In the above case the values for d_nnz,o_nnz are:
2574 .vb
2575      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2576      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2577      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2578 .ve
2579    Here the space allocated is sum of all the above values i.e 34, and
2580    hence pre-allocation is perfect.
2581 
2582    Level: intermediate
2583 
2584 .keywords: matrix, aij, compressed row, sparse, parallel
2585 
2586 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2587           MPIAIJ
2588 @*/
2589 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2590 {
2591   PetscErrorCode ierr;
2592   PetscMPIInt    size;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2596   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2597   if (size > 1) {
2598     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2599     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2600   } else {
2601     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2602     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2603   }
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2609 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2610 {
2611   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2612 
2613   PetscFunctionBegin;
2614   *Ad     = a->A;
2615   *Ao     = a->B;
2616   *colmap = a->garray;
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2622 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2623 {
2624   PetscErrorCode ierr;
2625   PetscInt       i;
2626   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2627 
2628   PetscFunctionBegin;
2629   if (coloring->ctype == IS_COLORING_LOCAL) {
2630     ISColoringValue *allcolors,*colors;
2631     ISColoring      ocoloring;
2632 
2633     /* set coloring for diagonal portion */
2634     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2635 
2636     /* set coloring for off-diagonal portion */
2637     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2638     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2639     for (i=0; i<a->B->n; i++) {
2640       colors[i] = allcolors[a->garray[i]];
2641     }
2642     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2643     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2644     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2645     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2646   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2647     ISColoringValue *colors;
2648     PetscInt             *larray;
2649     ISColoring      ocoloring;
2650 
2651     /* set coloring for diagonal portion */
2652     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2653     for (i=0; i<a->A->n; i++) {
2654       larray[i] = i + a->cstart;
2655     }
2656     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2657     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2658     for (i=0; i<a->A->n; i++) {
2659       colors[i] = coloring->colors[larray[i]];
2660     }
2661     ierr = PetscFree(larray);CHKERRQ(ierr);
2662     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2663     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2664     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2665 
2666     /* set coloring for off-diagonal portion */
2667     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2668     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2669     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2670     for (i=0; i<a->B->n; i++) {
2671       colors[i] = coloring->colors[larray[i]];
2672     }
2673     ierr = PetscFree(larray);CHKERRQ(ierr);
2674     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2675     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2676     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2677   } else {
2678     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2679   }
2680 
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 #if defined(PETSC_HAVE_ADIC)
2685 #undef __FUNCT__
2686 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2687 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2688 {
2689   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2690   PetscErrorCode ierr;
2691 
2692   PetscFunctionBegin;
2693   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2694   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2695   PetscFunctionReturn(0);
2696 }
2697 #endif
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2701 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2702 {
2703   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2708   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatMerge"
2714 /*@C
2715       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2716                  matrices from each processor
2717 
2718     Collective on MPI_Comm
2719 
2720    Input Parameters:
2721 +    comm - the communicators the parallel matrix will live on
2722 .    inmat - the input sequential matrices
2723 .    n - number of local columns (or PETSC_DECIDE)
2724 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2725 
2726    Output Parameter:
2727 .    outmat - the parallel matrix generated
2728 
2729     Level: advanced
2730 
2731    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2732 
2733 @*/
2734 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2735 {
2736   PetscErrorCode ierr;
2737   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2738   PetscInt       *indx;
2739   PetscScalar    *values;
2740   PetscMap       columnmap,rowmap;
2741 
2742   PetscFunctionBegin;
2743     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2744   /*
2745   PetscMPIInt       rank;
2746   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2747   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2748   */
2749   if (scall == MAT_INITIAL_MATRIX){
2750     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2751     if (n == PETSC_DECIDE){
2752       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2753       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2754       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2755       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2756       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2757     }
2758 
2759     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2760     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2761     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2762     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2763     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2764 
2765     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2766     for (i=0;i<m;i++) {
2767       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2768       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2769       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2770     }
2771     /* This routine will ONLY return MPIAIJ type matrix */
2772     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2773     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2774     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2775     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2776 
2777   } else if (scall == MAT_REUSE_MATRIX){
2778     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2779   } else {
2780     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2781   }
2782 
2783   for (i=0;i<m;i++) {
2784     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2785     I    = i + rstart;
2786     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2787     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2788   }
2789   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2790   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792 
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatFileSplit"
2798 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2799 {
2800   PetscErrorCode    ierr;
2801   PetscMPIInt       rank;
2802   PetscInt          m,N,i,rstart,nnz;
2803   size_t            len;
2804   const PetscInt    *indx;
2805   PetscViewer       out;
2806   char              *name;
2807   Mat               B;
2808   const PetscScalar *values;
2809 
2810   PetscFunctionBegin;
2811   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2812   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2813   /* Should this be the type of the diagonal block of A? */
2814   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2815   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2816   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2817   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2818   for (i=0;i<m;i++) {
2819     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2820     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2821     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2822   }
2823   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2824   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2825 
2826   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2827   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2828   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2829   sprintf(name,"%s.%d",outfile,rank);
2830   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2831   ierr = PetscFree(name);
2832   ierr = MatView(B,out);CHKERRQ(ierr);
2833   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2834   ierr = MatDestroy(B);CHKERRQ(ierr);
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2841 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2842 {
2843   PetscErrorCode       ierr;
2844   Mat_Merge_SeqsToMPI  *merge;
2845   PetscObjectContainer container;
2846 
2847   PetscFunctionBegin;
2848   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2849   if (container) {
2850     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2851     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2852     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2853     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2854     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2855     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2856     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2857     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2858     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2859     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2860     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2861     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2862 
2863     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2864     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2865   }
2866   ierr = PetscFree(merge);CHKERRQ(ierr);
2867 
2868   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2869   PetscFunctionReturn(0);
2870 }
2871 
2872 #include "src/mat/utils/freespace.h"
2873 #include "petscbt.h"
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatMerge_SeqsToMPI"
2876 /*@C
2877       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2878                  matrices from each processor
2879 
2880     Collective on MPI_Comm
2881 
2882    Input Parameters:
2883 +    comm - the communicators the parallel matrix will live on
2884 .    seqmat - the input sequential matrices
2885 .    m - number of local rows (or PETSC_DECIDE)
2886 .    n - number of local columns (or PETSC_DECIDE)
2887 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2888 
2889    Output Parameter:
2890 .    mpimat - the parallel matrix generated
2891 
2892     Level: advanced
2893 
2894    Notes:
2895      The dimensions of the sequential matrix in each processor MUST be the same.
2896      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2897      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2898 @*/
2899 static PetscEvent logkey_seqstompinum = 0;
2900 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2901 {
2902   PetscErrorCode       ierr;
2903   MPI_Comm             comm=mpimat->comm;
2904   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2905   PetscMPIInt          size,rank,taga,*len_s;
2906   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2907   PetscInt             proc,m;
2908   PetscInt             **buf_ri,**buf_rj;
2909   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2910   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2911   MPI_Request          *s_waits,*r_waits;
2912   MPI_Status           *status;
2913   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2914   Mat_Merge_SeqsToMPI  *merge;
2915   PetscObjectContainer container;
2916 
2917   PetscFunctionBegin;
2918   if (!logkey_seqstompinum) {
2919     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2920   }
2921   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2922 
2923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2924   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2925 
2926   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2927   if (container) {
2928     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2929   }
2930   bi     = merge->bi;
2931   bj     = merge->bj;
2932   buf_ri = merge->buf_ri;
2933   buf_rj = merge->buf_rj;
2934 
2935   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2936   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2937   len_s  = merge->len_s;
2938 
2939   /* send and recv matrix values */
2940   /*-----------------------------*/
2941   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2942   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2943 
2944   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2945   for (proc=0,k=0; proc<size; proc++){
2946     if (!len_s[proc]) continue;
2947     i = owners[proc];
2948     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2949     k++;
2950   }
2951 
2952   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2953   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2954   ierr = PetscFree(status);CHKERRQ(ierr);
2955 
2956   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2957   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2958 
2959   /* insert mat values of mpimat */
2960   /*----------------------------*/
2961   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2962   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2963   nextrow = buf_ri_k + merge->nrecv;
2964   nextai  = nextrow + merge->nrecv;
2965 
2966   for (k=0; k<merge->nrecv; k++){
2967     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2968     nrows = *(buf_ri_k[k]);
2969     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2970     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2971   }
2972 
2973   /* set values of ba */
2974   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2975   for (i=0; i<m; i++) {
2976     arow = owners[rank] + i;
2977     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2978     bnzi = bi[i+1] - bi[i];
2979     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2980 
2981     /* add local non-zero vals of this proc's seqmat into ba */
2982     anzi = ai[arow+1] - ai[arow];
2983     aj   = a->j + ai[arow];
2984     aa   = a->a + ai[arow];
2985     nextaj = 0;
2986     for (j=0; nextaj<anzi; j++){
2987       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2988         ba_i[j] += aa[nextaj++];
2989       }
2990     }
2991 
2992     /* add received vals into ba */
2993     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2994       /* i-th row */
2995       if (i == *nextrow[k]) {
2996         anzi = *(nextai[k]+1) - *nextai[k];
2997         aj   = buf_rj[k] + *(nextai[k]);
2998         aa   = abuf_r[k] + *(nextai[k]);
2999         nextaj = 0;
3000         for (j=0; nextaj<anzi; j++){
3001           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3002             ba_i[j] += aa[nextaj++];
3003           }
3004         }
3005         nextrow[k]++; nextai[k]++;
3006       }
3007     }
3008     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3009   }
3010   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3011   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3012 
3013   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3014   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3015   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3016   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3017   PetscFunctionReturn(0);
3018 }
3019 static PetscEvent logkey_seqstompisym = 0;
3020 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3021 {
3022   PetscErrorCode       ierr;
3023   Mat                  B_mpi;
3024   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3025   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3026   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3027   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3028   PetscInt             len,proc,*dnz,*onz;
3029   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3030   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3031   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3032   MPI_Status           *status;
3033   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3034   PetscBT              lnkbt;
3035   Mat_Merge_SeqsToMPI  *merge;
3036   PetscObjectContainer container;
3037 
3038   PetscFunctionBegin;
3039   if (!logkey_seqstompisym) {
3040     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3041   }
3042   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3043 
3044   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3045   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3046 
3047   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3048   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3049 
3050   /* determine row ownership */
3051   /*---------------------------------------------------------*/
3052   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3053   if (m == PETSC_DECIDE) {
3054     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3055   } else {
3056     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3057   }
3058   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3059   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3060   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3061 
3062   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3063   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3064 
3065   /* determine the number of messages to send, their lengths */
3066   /*---------------------------------------------------------*/
3067   len_s  = merge->len_s;
3068 
3069   len = 0;  /* length of buf_si[] */
3070   merge->nsend = 0;
3071   for (proc=0; proc<size; proc++){
3072     len_si[proc] = 0;
3073     if (proc == rank){
3074       len_s[proc] = 0;
3075     } else {
3076       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3077       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3078     }
3079     if (len_s[proc]) {
3080       merge->nsend++;
3081       nrows = 0;
3082       for (i=owners[proc]; i<owners[proc+1]; i++){
3083         if (ai[i+1] > ai[i]) nrows++;
3084       }
3085       len_si[proc] = 2*(nrows+1);
3086       len += len_si[proc];
3087     }
3088   }
3089 
3090   /* determine the number and length of messages to receive for ij-structure */
3091   /*-------------------------------------------------------------------------*/
3092   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3093   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3094 
3095   /* post the Irecv of j-structure */
3096   /*-------------------------------*/
3097   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3098   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3099 
3100   /* post the Isend of j-structure */
3101   /*--------------------------------*/
3102   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3103   sj_waits = si_waits + merge->nsend;
3104 
3105   for (proc=0, k=0; proc<size; proc++){
3106     if (!len_s[proc]) continue;
3107     i = owners[proc];
3108     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3109     k++;
3110   }
3111 
3112   /* receives and sends of j-structure are complete */
3113   /*------------------------------------------------*/
3114   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3115   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3116 
3117   /* send and recv i-structure */
3118   /*---------------------------*/
3119   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3120   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3121 
3122   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3123   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3124   for (proc=0,k=0; proc<size; proc++){
3125     if (!len_s[proc]) continue;
3126     /* form outgoing message for i-structure:
3127          buf_si[0]:                 nrows to be sent
3128                [1:nrows]:           row index (global)
3129                [nrows+1:2*nrows+1]: i-structure index
3130     */
3131     /*-------------------------------------------*/
3132     nrows = len_si[proc]/2 - 1;
3133     buf_si_i    = buf_si + nrows+1;
3134     buf_si[0]   = nrows;
3135     buf_si_i[0] = 0;
3136     nrows = 0;
3137     for (i=owners[proc]; i<owners[proc+1]; i++){
3138       anzi = ai[i+1] - ai[i];
3139       if (anzi) {
3140         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3141         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3142         nrows++;
3143       }
3144     }
3145     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3146     k++;
3147     buf_si += len_si[proc];
3148   }
3149 
3150   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3151   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3152 
3153   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3154   for (i=0; i<merge->nrecv; i++){
3155     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3156   }
3157 
3158   ierr = PetscFree(len_si);CHKERRQ(ierr);
3159   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3160   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3161   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3162   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3163   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3164   ierr = PetscFree(status);CHKERRQ(ierr);
3165 
3166   /* compute a local seq matrix in each processor */
3167   /*----------------------------------------------*/
3168   /* allocate bi array and free space for accumulating nonzero column info */
3169   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3170   bi[0] = 0;
3171 
3172   /* create and initialize a linked list */
3173   nlnk = N+1;
3174   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3175 
3176   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3177   len = 0;
3178   len  = ai[owners[rank+1]] - ai[owners[rank]];
3179   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3180   current_space = free_space;
3181 
3182   /* determine symbolic info for each local row */
3183   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3184   nextrow = buf_ri_k + merge->nrecv;
3185   nextai  = nextrow + merge->nrecv;
3186   for (k=0; k<merge->nrecv; k++){
3187     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3188     nrows = *buf_ri_k[k];
3189     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3190     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3191   }
3192 
3193   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3194   len = 0;
3195   for (i=0;i<m;i++) {
3196     bnzi   = 0;
3197     /* add local non-zero cols of this proc's seqmat into lnk */
3198     arow   = owners[rank] + i;
3199     anzi   = ai[arow+1] - ai[arow];
3200     aj     = a->j + ai[arow];
3201     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3202     bnzi += nlnk;
3203     /* add received col data into lnk */
3204     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3205       if (i == *nextrow[k]) { /* i-th row */
3206         anzi = *(nextai[k]+1) - *nextai[k];
3207         aj   = buf_rj[k] + *nextai[k];
3208         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3209         bnzi += nlnk;
3210         nextrow[k]++; nextai[k]++;
3211       }
3212     }
3213     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3214 
3215     /* if free space is not available, make more free space */
3216     if (current_space->local_remaining<bnzi) {
3217       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3218       nspacedouble++;
3219     }
3220     /* copy data into free space, then initialize lnk */
3221     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3222     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3223 
3224     current_space->array           += bnzi;
3225     current_space->local_used      += bnzi;
3226     current_space->local_remaining -= bnzi;
3227 
3228     bi[i+1] = bi[i] + bnzi;
3229   }
3230 
3231   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3232 
3233   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3234   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3235   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3236 
3237   /* create symbolic parallel matrix B_mpi */
3238   /*---------------------------------------*/
3239   if (n==PETSC_DECIDE) {
3240     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3241   } else {
3242     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3243   }
3244   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3245   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3246   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3247 
3248   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3249   B_mpi->assembled     = PETSC_FALSE;
3250   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3251   merge->bi            = bi;
3252   merge->bj            = bj;
3253   merge->buf_ri        = buf_ri;
3254   merge->buf_rj        = buf_rj;
3255   merge->coi           = PETSC_NULL;
3256   merge->coj           = PETSC_NULL;
3257   merge->owners_co     = PETSC_NULL;
3258 
3259   /* attach the supporting struct to B_mpi for reuse */
3260   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3261   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3262   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3263   *mpimat = B_mpi;
3264   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 static PetscEvent logkey_seqstompi = 0;
3269 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3270 {
3271   PetscErrorCode   ierr;
3272 
3273   PetscFunctionBegin;
3274   if (!logkey_seqstompi) {
3275     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3276   }
3277   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3278   if (scall == MAT_INITIAL_MATRIX){
3279     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3280   }
3281   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3282   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 static PetscEvent logkey_getlocalmat = 0;
3286 #undef __FUNCT__
3287 #define __FUNCT__ "MatGetLocalMat"
3288 /*@C
3289      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3290 
3291     Not Collective
3292 
3293    Input Parameters:
3294 +    A - the matrix
3295 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3296 
3297    Output Parameter:
3298 .    A_loc - the local sequential matrix generated
3299 
3300     Level: developer
3301 
3302 @*/
3303 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3304 {
3305   PetscErrorCode  ierr;
3306   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3307   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3308   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3309   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3310   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3311   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3312 
3313   PetscFunctionBegin;
3314   if (!logkey_getlocalmat) {
3315     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3316   }
3317   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3318   if (scall == MAT_INITIAL_MATRIX){
3319     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3320     ci[0] = 0;
3321     for (i=0; i<am; i++){
3322       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3323     }
3324     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3325     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3326     k = 0;
3327     for (i=0; i<am; i++) {
3328       ncols_o = bi[i+1] - bi[i];
3329       ncols_d = ai[i+1] - ai[i];
3330       /* off-diagonal portion of A */
3331       for (jo=0; jo<ncols_o; jo++) {
3332         col = cmap[*bj];
3333         if (col >= cstart) break;
3334         cj[k]   = col; bj++;
3335         ca[k++] = *ba++;
3336       }
3337       /* diagonal portion of A */
3338       for (j=0; j<ncols_d; j++) {
3339         cj[k]   = cstart + *aj++;
3340         ca[k++] = *aa++;
3341       }
3342       /* off-diagonal portion of A */
3343       for (j=jo; j<ncols_o; j++) {
3344         cj[k]   = cmap[*bj++];
3345         ca[k++] = *ba++;
3346       }
3347     }
3348     /* put together the new matrix */
3349     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3350     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3351     /* Since these are PETSc arrays, change flags to free them as necessary. */
3352     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3353     mat->freedata = PETSC_TRUE;
3354     mat->nonew    = 0;
3355   } else if (scall == MAT_REUSE_MATRIX){
3356     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3357     ci = mat->i; cj = mat->j; ca = mat->a;
3358     for (i=0; i<am; i++) {
3359       /* off-diagonal portion of A */
3360       ncols_o = bi[i+1] - bi[i];
3361       for (jo=0; jo<ncols_o; jo++) {
3362         col = cmap[*bj];
3363         if (col >= cstart) break;
3364         *ca++ = *ba++; bj++;
3365       }
3366       /* diagonal portion of A */
3367       ncols_d = ai[i+1] - ai[i];
3368       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3369       /* off-diagonal portion of A */
3370       for (j=jo; j<ncols_o; j++) {
3371         *ca++ = *ba++; bj++;
3372       }
3373     }
3374   } else {
3375     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3376   }
3377 
3378   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3379   PetscFunctionReturn(0);
3380 }
3381 
3382 static PetscEvent logkey_getlocalmatcondensed = 0;
3383 #undef __FUNCT__
3384 #define __FUNCT__ "MatGetLocalMatCondensed"
3385 /*@C
3386      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3387 
3388     Not Collective
3389 
3390    Input Parameters:
3391 +    A - the matrix
3392 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3393 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3394 
3395    Output Parameter:
3396 .    A_loc - the local sequential matrix generated
3397 
3398     Level: developer
3399 
3400 @*/
3401 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3402 {
3403   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3404   PetscErrorCode    ierr;
3405   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3406   IS                isrowa,iscola;
3407   Mat               *aloc;
3408 
3409   PetscFunctionBegin;
3410   if (!logkey_getlocalmatcondensed) {
3411     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3412   }
3413   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3414   if (!row){
3415     start = a->rstart; end = a->rend;
3416     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3417   } else {
3418     isrowa = *row;
3419   }
3420   if (!col){
3421     start = a->cstart;
3422     cmap  = a->garray;
3423     nzA   = a->A->n;
3424     nzB   = a->B->n;
3425     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3426     ncols = 0;
3427     for (i=0; i<nzB; i++) {
3428       if (cmap[i] < start) idx[ncols++] = cmap[i];
3429       else break;
3430     }
3431     imark = i;
3432     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3433     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3434     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3435     ierr = PetscFree(idx);CHKERRQ(ierr);
3436   } else {
3437     iscola = *col;
3438   }
3439   if (scall != MAT_INITIAL_MATRIX){
3440     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3441     aloc[0] = *A_loc;
3442   }
3443   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3444   *A_loc = aloc[0];
3445   ierr = PetscFree(aloc);CHKERRQ(ierr);
3446   if (!row){
3447     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3448   }
3449   if (!col){
3450     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3451   }
3452   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3453   PetscFunctionReturn(0);
3454 }
3455 
3456 static PetscEvent logkey_GetBrowsOfAcols = 0;
3457 #undef __FUNCT__
3458 #define __FUNCT__ "MatGetBrowsOfAcols"
3459 /*@C
3460     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3461 
3462     Collective on Mat
3463 
3464    Input Parameters:
3465 +    A,B - the matrices in mpiaij format
3466 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3467 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3468 
3469    Output Parameter:
3470 +    rowb, colb - index sets of rows and columns of B to extract
3471 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3472 -    B_seq - the sequential matrix generated
3473 
3474     Level: developer
3475 
3476 @*/
3477 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3478 {
3479   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3480   PetscErrorCode    ierr;
3481   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3482   IS                isrowb,iscolb;
3483   Mat               *bseq;
3484 
3485   PetscFunctionBegin;
3486   if (a->cstart != b->rstart || a->cend != b->rend){
3487     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3488   }
3489   if (!logkey_GetBrowsOfAcols) {
3490     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3491   }
3492   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3493 
3494   if (scall == MAT_INITIAL_MATRIX){
3495     start = a->cstart;
3496     cmap  = a->garray;
3497     nzA   = a->A->n;
3498     nzB   = a->B->n;
3499     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3500     ncols = 0;
3501     for (i=0; i<nzB; i++) {  /* row < local row index */
3502       if (cmap[i] < start) idx[ncols++] = cmap[i];
3503       else break;
3504     }
3505     imark = i;
3506     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3507     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3508     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3509     ierr = PetscFree(idx);CHKERRQ(ierr);
3510     *brstart = imark;
3511     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3512   } else {
3513     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3514     isrowb = *rowb; iscolb = *colb;
3515     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3516     bseq[0] = *B_seq;
3517   }
3518   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3519   *B_seq = bseq[0];
3520   ierr = PetscFree(bseq);CHKERRQ(ierr);
3521   if (!rowb){
3522     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3523   } else {
3524     *rowb = isrowb;
3525   }
3526   if (!colb){
3527     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3528   } else {
3529     *colb = iscolb;
3530   }
3531   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 static PetscEvent logkey_GetBrowsOfAocols = 0;
3536 #undef __FUNCT__
3537 #define __FUNCT__ "MatGetBrowsOfAoCols"
3538 /*@C
3539     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3540     of the OFF-DIAGONAL portion of local A
3541 
3542     Collective on Mat
3543 
3544    Input Parameters:
3545 +    A,B - the matrices in mpiaij format
3546 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3547 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3548 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3549 
3550    Output Parameter:
3551 +    B_oth - the sequential matrix generated
3552 
3553     Level: developer
3554 
3555 @*/
3556 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3557 {
3558   VecScatter_MPI_General *gen_to,*gen_from;
3559   PetscErrorCode         ierr;
3560   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3561   Mat_SeqAIJ             *b_oth;
3562   VecScatter             ctx=a->Mvctx;
3563   MPI_Comm               comm=ctx->comm;
3564   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3565   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3566   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3567   PetscInt               i,k,l,nrecvs,nsends,nrows,*rrow,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3568   MPI_Request            *rwaits,*swaits;
3569   MPI_Status             *sstatus,rstatus;
3570   PetscInt               *cols;
3571   PetscScalar            *vals;
3572   PetscMPIInt            j;
3573 
3574   PetscFunctionBegin;
3575   if (a->cstart != b->rstart || a->cend != b->rend){
3576     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3577   }
3578   if (!logkey_GetBrowsOfAocols) {
3579     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3580   }
3581   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3582   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3583 
3584   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3585   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3586   rvalues  = gen_from->values; /* holds the length of sending row */
3587   svalues  = gen_to->values;   /* holds the length of receiving row */
3588   nrecvs   = gen_from->n;
3589   nsends   = gen_to->n;
3590   rwaits   = gen_from->requests;
3591   swaits   = gen_to->requests;
3592   rrow     = gen_from->indices; /* local row index to be received */
3593   srow     = gen_to->indices;   /* local row index to be sent */
3594   rstarts  = gen_from->starts;
3595   sstarts  = gen_to->starts;
3596   rprocs   = gen_from->procs;
3597   sprocs   = gen_to->procs;
3598   sstatus  = gen_to->sstatus;
3599 
3600   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3601   if (scall == MAT_INITIAL_MATRIX){
3602     /* i-array */
3603     /*---------*/
3604     /*  post receives */
3605     for (i=0; i<nrecvs; i++){
3606       rowlen = (PetscInt*)rvalues + rstarts[i];
3607       nrows = rstarts[i+1]-rstarts[i];
3608       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3609     }
3610 
3611     /* pack the outgoing message */
3612     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3613     rstartsj = sstartsj + nsends +1;
3614     sstartsj[0] = 0;  rstartsj[0] = 0;
3615     len = 0; /* total length of j or a array to be sent */
3616     k = 0;
3617     for (i=0; i<nsends; i++){
3618       rowlen = (PetscInt*)svalues + sstarts[i];
3619       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3620       for (j=0; j<nrows; j++) {
3621         row = srow[k] + b->rowners[rank]; /* global row idx */
3622         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3623         len += rowlen[j];
3624         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3625         k++;
3626       }
3627       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3628        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3629     }
3630     /* recvs and sends of i-array are completed */
3631     i = nrecvs;
3632     while (i--) {
3633       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3634     }
3635     if (nsends) {
3636       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3637     }
3638     /* allocate buffers for sending j and a arrays */
3639     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3640     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3641 
3642     /* create i-array of B_oth */
3643     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3644     b_othi[0] = 0;
3645     len = 0; /* total length of j or a array to be received */
3646     k = 0;
3647     for (i=0; i<nrecvs; i++){
3648       rowlen = (PetscInt*)rvalues + rstarts[i];
3649       nrows = rstarts[i+1]-rstarts[i];
3650       for (j=0; j<nrows; j++) {
3651         b_othi[k+1] = b_othi[k] + rowlen[j];
3652         len += rowlen[j]; k++;
3653       }
3654       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3655     }
3656 
3657     /* allocate space for j and a arrrays of B_oth */
3658     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3659     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3660 
3661     /* j-array */
3662     /*---------*/
3663     /*  post receives of j-array */
3664     for (i=0; i<nrecvs; i++){
3665       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3666       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3667     }
3668     k = 0;
3669     for (i=0; i<nsends; i++){
3670       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3671       bufJ = bufj+sstartsj[i];
3672       for (j=0; j<nrows; j++) {
3673         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3674         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3675         for (l=0; l<ncols; l++){
3676           *bufJ++ = cols[l];
3677         }
3678         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3679       }
3680       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3681     }
3682 
3683     /* recvs and sends of j-array are completed */
3684     i = nrecvs;
3685     while (i--) {
3686       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3687     }
3688     if (nsends) {
3689       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3690     }
3691   } else if (scall == MAT_REUSE_MATRIX){
3692     sstartsj = *startsj;
3693     rstartsj = sstartsj + nsends +1;
3694     bufa     = *bufa_ptr;
3695     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3696     b_otha   = b_oth->a;
3697   } else {
3698     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3699   }
3700 
3701   /* a-array */
3702   /*---------*/
3703   /*  post receives of a-array */
3704   for (i=0; i<nrecvs; i++){
3705     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3706     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3707   }
3708   k = 0;
3709   for (i=0; i<nsends; i++){
3710     nrows = sstarts[i+1]-sstarts[i];
3711     bufA = bufa+sstartsj[i];
3712     for (j=0; j<nrows; j++) {
3713       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3714       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3715       for (l=0; l<ncols; l++){
3716         *bufA++ = vals[l];
3717       }
3718       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3719 
3720     }
3721     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3722   }
3723   /* recvs and sends of a-array are completed */
3724   i = nrecvs;
3725   while (i--) {
3726     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3727   }
3728    if (nsends) {
3729     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3730   }
3731 
3732   if (scall == MAT_INITIAL_MATRIX){
3733     /* put together the new matrix */
3734     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3735 
3736     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3737     /* Since these are PETSc arrays, change flags to free them as necessary. */
3738     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3739     b_oth->freedata = PETSC_TRUE;
3740     b_oth->nonew    = 0;
3741 
3742     ierr = PetscFree(bufj);CHKERRQ(ierr);
3743     if (!startsj || !bufa_ptr){
3744       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3745       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3746     } else {
3747       *startsj  = sstartsj;
3748       *bufa_ptr = bufa;
3749     }
3750   }
3751   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3752 
3753   PetscFunctionReturn(0);
3754 }
3755 
3756 /*MC
3757    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3758 
3759    Options Database Keys:
3760 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3761 
3762   Level: beginner
3763 
3764 .seealso: MatCreateMPIAIJ
3765 M*/
3766 
3767 EXTERN_C_BEGIN
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatCreate_MPIAIJ"
3770 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3771 {
3772   Mat_MPIAIJ     *b;
3773   PetscErrorCode ierr;
3774   PetscInt       i;
3775   PetscMPIInt    size;
3776 
3777   PetscFunctionBegin;
3778   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3779 
3780   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3781   B->data         = (void*)b;
3782   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3783   B->factor       = 0;
3784   B->bs           = 1;
3785   B->assembled    = PETSC_FALSE;
3786   B->mapping      = 0;
3787 
3788   B->insertmode      = NOT_SET_VALUES;
3789   b->size            = size;
3790   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3791 
3792   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3793   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3794 
3795   /* the information in the maps duplicates the information computed below, eventually
3796      we should remove the duplicate information that is not contained in the maps */
3797   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3798   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3799 
3800   /* build local table of row and column ownerships */
3801   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3802   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3803   b->cowners = b->rowners + b->size + 2;
3804   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3805   b->rowners[0] = 0;
3806   for (i=2; i<=b->size; i++) {
3807     b->rowners[i] += b->rowners[i-1];
3808   }
3809   b->rstart = b->rowners[b->rank];
3810   b->rend   = b->rowners[b->rank+1];
3811   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3812   b->cowners[0] = 0;
3813   for (i=2; i<=b->size; i++) {
3814     b->cowners[i] += b->cowners[i-1];
3815   }
3816   b->cstart = b->cowners[b->rank];
3817   b->cend   = b->cowners[b->rank+1];
3818 
3819   /* build cache for off array entries formed */
3820   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3821   b->donotstash  = PETSC_FALSE;
3822   b->colmap      = 0;
3823   b->garray      = 0;
3824   b->roworiented = PETSC_TRUE;
3825 
3826   /* stuff used for matrix vector multiply */
3827   b->lvec      = PETSC_NULL;
3828   b->Mvctx     = PETSC_NULL;
3829 
3830   /* stuff for MatGetRow() */
3831   b->rowindices   = 0;
3832   b->rowvalues    = 0;
3833   b->getrowactive = PETSC_FALSE;
3834 
3835   /* Explicitly create 2 MATSEQAIJ matrices. */
3836   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3837   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3838   PetscLogObjectParent(B,b->A);
3839   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3840   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3841   PetscLogObjectParent(B,b->B);
3842 
3843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3844                                      "MatStoreValues_MPIAIJ",
3845                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3847                                      "MatRetrieveValues_MPIAIJ",
3848                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3850 				     "MatGetDiagonalBlock_MPIAIJ",
3851                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3853 				     "MatIsTranspose_MPIAIJ",
3854 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3856 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3857 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3859 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3860 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3861   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3862 				     "MatDiagonalScaleLocal_MPIAIJ",
3863 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3864   PetscFunctionReturn(0);
3865 }
3866 EXTERN_C_END
3867