xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dba47a550923b04c7c4ebbb735eb62a1b3e4e9ae)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
40     lastcol1 = col;\
41     while (high1-low1 > 5) { \
42       t = (low1+high1)/2; \
43       if (rp1[t] > col) high1 = t; \
44       else             low1  = t; \
45     } \
46       for (_i=low1; _i<high1; _i++) { \
47         if (rp1[_i] > col) break; \
48         if (rp1[_i] == col) { \
49           if (addv == ADD_VALUES) ap1[_i] += value;   \
50           else                    ap1[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
57       if (nrow1 >= rmax1) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
60         PetscScalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
63  \
64         /* malloc new storage space */ \
65         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
66         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
67         new_j   = (PetscInt*)(new_a + new_nz); \
68         new_i   = new_j + new_nz; \
69  \
70         /* copy over old data into new slots */ \
71         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
72         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
73         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \
74         len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \
75         ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \
76         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \
77         ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
78         /* free up old matrix storage */ \
79  \
80         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
81         if (!a->singlemalloc) { \
82            ierr = PetscFree(a->i);CHKERRQ(ierr); \
83            ierr = PetscFree(a->j);CHKERRQ(ierr); \
84         } \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = PETSC_TRUE; \
87  \
88         rp1   = aj + ai[row]; ap1 = aa + ai[row]; \
89         rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \
90         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow1++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for (ii=N; ii>=_i; ii--) { \
97         rp1[ii+1] = rp1[ii]; \
98         ap1[ii+1] = ap1[ii]; \
99       } \
100       rp1[_i] = col;  \
101       ap1[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow1; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
109     lastcol2 = col;\
110     while (high2-low2 > 5) { \
111       t = (low2+high2)/2; \
112       if (rp2[t] > col) high2 = t; \
113       else             low2  = t; \
114     } \
115        for (_i=low2; _i<high2; _i++) { \
116         if (rp2[_i] > col) break; \
117         if (rp2[_i] == col) { \
118           if (addv == ADD_VALUES) ap2[_i] += value;   \
119           else                    ap2[_i] = value; \
120           goto b_noinsert; \
121         } \
122       }  \
123       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
124       if (nonew == 1) goto b_noinsert; \
125       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126       if (nrow2 >= rmax2) { \
127         /* there is no extra room in row, therefore enlarge */ \
128         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
129         PetscScalar *new_a; \
130  \
131         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
132  \
133         /* malloc new storage space */ \
134         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
135         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
136         new_j   = (PetscInt*)(new_a + new_nz); \
137         new_i   = new_j + new_nz; \
138  \
139         /* copy over old data into new slots */ \
140         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
141         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
142         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \
143         len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \
144         ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \
145         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \
146         ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
147         /* free up old matrix storage */ \
148  \
149         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
150         if (!b->singlemalloc) { \
151           ierr = PetscFree(b->i);CHKERRQ(ierr); \
152           ierr = PetscFree(b->j);CHKERRQ(ierr); \
153         } \
154         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
155         b->singlemalloc = PETSC_TRUE; \
156  \
157         rp2   = bj + bi[row]; ap2 = ba + bi[row]; \
158         rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \
159         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
160         b->maxnz += CHUNKSIZE; \
161         b->reallocs++; \
162       } \
163       N = nrow2++ - 1; b->nz++; \
164       /* shift up all the later entries in this row */ \
165       for (ii=N; ii>=_i; ii--) { \
166         rp2[ii+1] = rp2[ii]; \
167         ap2[ii+1] = ap2[ii]; \
168       } \
169       rp2[_i] = col;  \
170       ap2[_i] = value;  \
171       b_noinsert: ; \
172       bilen[row] = nrow2; \
173 }
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatSetValues_MPIAIJ"
177 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
180   PetscScalar    value;
181   PetscErrorCode ierr;
182   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
183   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
184   PetscTruth     roworiented = aij->roworiented;
185 
186   /* Some Variables required in the macro */
187   Mat            A = aij->A;
188   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
189   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
190   PetscScalar    *aa = a->a;
191   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
192   Mat            B = aij->B;
193   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
194   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
195   PetscScalar    *ba = b->a;
196 
197   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
198   PetscInt       nonew = a->nonew;
199   PetscScalar    *ap1,*ap2;
200 
201   PetscFunctionBegin;
202   for (i=0; i<m; i++) {
203     if (im[i] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
206 #endif
207     if (im[i] >= rstart && im[i] < rend) {
208       row      = im[i] - rstart;
209       lastcol1 = -1;
210       rp1      = aj + ai[row];
211       ap1      = aa + ai[row];
212       rmax1    = aimax[row];
213       nrow1    = ailen[row];
214       low1     = 0;
215       high1    = nrow1;
216       lastcol2 = -1;
217       rp2      = bj + bi[row];
218       ap2      = ba + bi[row]; \
219       rmax2    = bimax[row];
220       nrow2    = bilen[row];  \
221       low2     = 0;
222       high2    = nrow2;
223 
224       for (j=0; j<n; j++) {
225         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (PETSC_USE_CTABLE)
240             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ*)B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               ba = b->a;
253             }
254           } else col = in[j];
255           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
398   b->compressedrow.use = PETSC_TRUE;
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 
407   /* used by MatAXPY() */
408   a->xtoy = 0; b->xtoy = 0;
409   a->XtoY = 0; b->XtoY = 0;
410 
411   PetscFunctionReturn(0);
412 }
413 
414 #undef __FUNCT__
415 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
416 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
417 {
418   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
419   PetscErrorCode ierr;
420 
421   PetscFunctionBegin;
422   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
423   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatZeroRows_MPIAIJ"
429 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
430 {
431   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
432   PetscErrorCode ierr;
433   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
434   PetscInt       i,N,*rows,*owners = l->rowners;
435   PetscInt       *nprocs,j,idx,nsends,row;
436   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
437   PetscInt       *rvalues,count,base,slen,*source;
438   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
439   MPI_Comm       comm = A->comm;
440   MPI_Request    *send_waits,*recv_waits;
441   MPI_Status     recv_status,*send_status;
442   IS             istmp;
443 #if defined(PETSC_DEBUG)
444   PetscTruth     found = PETSC_FALSE;
445 #endif
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
453   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
455   j = 0;
456   for (i=0; i<N; i++) {
457     if (lastidx > (idx = rows[i])) j = 0;
458     lastidx = idx;
459     for (; j<size; j++) {
460       if (idx >= owners[j] && idx < owners[j+1]) {
461         nprocs[2*j]++;
462         nprocs[2*j+1] = 1;
463         owner[i] = j;
464 #if defined(PETSC_DEBUG)
465         found = PETSC_TRUE;
466 #endif
467         break;
468       }
469     }
470 #if defined(PETSC_DEBUG)
471     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
472     found = PETSC_FALSE;
473 #endif
474   }
475   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
476 
477   /* inform other processors of number of messages and max length*/
478   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
479 
480   /* post receives:   */
481   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
483   for (i=0; i<nrecvs; i++) {
484     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
485   }
486 
487   /* do sends:
488       1) starts[i] gives the starting index in svalues for stuff going to
489          the ith processor
490   */
491   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
492   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
493   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
494   starts[0] = 0;
495   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
496   for (i=0; i<N; i++) {
497     svalues[starts[owner[i]]++] = rows[i];
498   }
499   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
500 
501   starts[0] = 0;
502   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
503   count = 0;
504   for (i=0; i<size; i++) {
505     if (nprocs[2*i+1]) {
506       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
507     }
508   }
509   ierr = PetscFree(starts);CHKERRQ(ierr);
510 
511   base = owners[rank];
512 
513   /*  wait on receives */
514   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
515   source = lens + nrecvs;
516   count  = nrecvs; slen = 0;
517   while (count) {
518     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
519     /* unpack receives into our local space */
520     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
521     source[imdex]  = recv_status.MPI_SOURCE;
522     lens[imdex]    = n;
523     slen          += n;
524     count--;
525   }
526   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
527 
528   /* move the data into the send scatter */
529   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
530   count = 0;
531   for (i=0; i<nrecvs; i++) {
532     values = rvalues + i*nmax;
533     for (j=0; j<lens[i]; j++) {
534       lrows[count++] = values[j] - base;
535     }
536   }
537   ierr = PetscFree(rvalues);CHKERRQ(ierr);
538   ierr = PetscFree(lens);CHKERRQ(ierr);
539   ierr = PetscFree(owner);CHKERRQ(ierr);
540   ierr = PetscFree(nprocs);CHKERRQ(ierr);
541 
542   /* actually zap the local rows */
543   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
544   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
545 
546   /*
547         Zero the required rows. If the "diagonal block" of the matrix
548      is square and the user wishes to set the diagonal we use seperate
549      code so that MatSetValues() is not called for each diagonal allocating
550      new memory, thus calling lots of mallocs and slowing things down.
551 
552        Contributed by: Mathew Knepley
553   */
554   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
555   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
556   if (diag && (l->A->M == l->A->N)) {
557     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
558   } else if (diag) {
559     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
560     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
561       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
562 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
563     }
564     for (i = 0; i < slen; i++) {
565       row  = lrows[i] + rstart;
566       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
567     }
568     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570   } else {
571     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
572   }
573   ierr = ISDestroy(istmp);CHKERRQ(ierr);
574   ierr = PetscFree(lrows);CHKERRQ(ierr);
575 
576   /* wait on sends */
577   if (nsends) {
578     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
579     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
580     ierr = PetscFree(send_status);CHKERRQ(ierr);
581   }
582   ierr = PetscFree(send_waits);CHKERRQ(ierr);
583   ierr = PetscFree(svalues);CHKERRQ(ierr);
584 
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatMult_MPIAIJ"
590 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
591 {
592   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
593   PetscErrorCode ierr;
594   PetscInt       nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
613   PetscErrorCode ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
628   PetscErrorCode ierr;
629   PetscTruth     merged;
630 
631   PetscFunctionBegin;
632   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
633   /* do nondiagonal part */
634   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
635   if (!merged) {
636     /* send it on its way */
637     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638     /* do local part */
639     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
640     /* receive remote parts: note this assumes the values are not actually */
641     /* added in yy until the next line, */
642     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
643   } else {
644     /* do local part */
645     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
646     /* send it on its way */
647     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
648     /* values actually were received in the Begin() but we need to call this nop */
649     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
650   }
651   PetscFunctionReturn(0);
652 }
653 
654 EXTERN_C_BEGIN
655 #undef __FUNCT__
656 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
657 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
658 {
659   MPI_Comm       comm;
660   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
661   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
662   IS             Me,Notme;
663   PetscErrorCode ierr;
664   PetscInt       M,N,first,last,*notme,i;
665   PetscMPIInt    size;
666 
667   PetscFunctionBegin;
668 
669   /* Easy test: symmetric diagonal block */
670   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
671   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
672   if (!*f) PetscFunctionReturn(0);
673   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
674   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
675   if (size == 1) PetscFunctionReturn(0);
676 
677   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
678   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
679   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
680   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
681   for (i=0; i<first; i++) notme[i] = i;
682   for (i=last; i<M; i++) notme[i-last+first] = i;
683   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
684   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
685   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
686   Aoff = Aoffs[0];
687   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
688   Boff = Boffs[0];
689   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
690   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
691   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
692   ierr = ISDestroy(Me);CHKERRQ(ierr);
693   ierr = ISDestroy(Notme);CHKERRQ(ierr);
694 
695   PetscFunctionReturn(0);
696 }
697 EXTERN_C_END
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
701 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
702 {
703   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
704   PetscErrorCode ierr;
705 
706   PetscFunctionBegin;
707   /* do nondiagonal part */
708   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
709   /* send it on its way */
710   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
711   /* do local part */
712   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
713   /* receive remote parts */
714   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 
718 /*
719   This only works correctly for square matrices where the subblock A->A is the
720    diagonal block
721 */
722 #undef __FUNCT__
723 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
724 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
725 {
726   PetscErrorCode ierr;
727   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
728 
729   PetscFunctionBegin;
730   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
731   if (a->rstart != a->cstart || a->rend != a->cend) {
732     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
733   }
734   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatScale_MPIAIJ"
740 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
741 {
742   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
743   PetscErrorCode ierr;
744 
745   PetscFunctionBegin;
746   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
747   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatDestroy_MPIAIJ"
753 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
754 {
755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
756   PetscErrorCode ierr;
757 
758   PetscFunctionBegin;
759 #if defined(PETSC_USE_LOG)
760   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
761 #endif
762   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
763   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
764   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
765   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
766 #if defined (PETSC_USE_CTABLE)
767   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
768 #else
769   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
770 #endif
771   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
772   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
773   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
774   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
775   ierr = PetscFree(aij);CHKERRQ(ierr);
776 
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatView_MPIAIJ_Binary"
789 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
790 {
791   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
792   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
793   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
794   PetscErrorCode    ierr;
795   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
796   int               fd;
797   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
798   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
799   PetscScalar       *column_values;
800 
801   PetscFunctionBegin;
802   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
803   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
804   nz   = A->nz + B->nz;
805   if (!rank) {
806     header[0] = MAT_FILE_COOKIE;
807     header[1] = mat->M;
808     header[2] = mat->N;
809     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
810     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
811     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
812     /* get largest number of rows any processor has */
813     rlen = mat->m;
814     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
815     for (i=1; i<size; i++) {
816       rlen = PetscMax(rlen,range[i+1] - range[i]);
817     }
818   } else {
819     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
820     rlen = mat->m;
821   }
822 
823   /* load up the local row counts */
824   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
825   for (i=0; i<mat->m; i++) {
826     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
827   }
828 
829   /* store the row lengths to the file */
830   if (!rank) {
831     MPI_Status status;
832     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
833     for (i=1; i<size; i++) {
834       rlen = range[i+1] - range[i];
835       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
836       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
837     }
838   } else {
839     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
840   }
841   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
842 
843   /* load up the local column indices */
844   nzmax = nz; /* )th processor needs space a largest processor needs */
845   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
846   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
847   cnt  = 0;
848   for (i=0; i<mat->m; i++) {
849     for (j=B->i[i]; j<B->i[i+1]; j++) {
850       if ( (col = garray[B->j[j]]) > cstart) break;
851       column_indices[cnt++] = col;
852     }
853     for (k=A->i[i]; k<A->i[i+1]; k++) {
854       column_indices[cnt++] = A->j[k] + cstart;
855     }
856     for (; j<B->i[i+1]; j++) {
857       column_indices[cnt++] = garray[B->j[j]];
858     }
859   }
860   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
861 
862   /* store the column indices to the file */
863   if (!rank) {
864     MPI_Status status;
865     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
866     for (i=1; i<size; i++) {
867       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
868       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
869       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
870       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
871     }
872   } else {
873     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
874     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
875   }
876   ierr = PetscFree(column_indices);CHKERRQ(ierr);
877 
878   /* load up the local column values */
879   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
880   cnt  = 0;
881   for (i=0; i<mat->m; i++) {
882     for (j=B->i[i]; j<B->i[i+1]; j++) {
883       if ( garray[B->j[j]] > cstart) break;
884       column_values[cnt++] = B->a[j];
885     }
886     for (k=A->i[i]; k<A->i[i+1]; k++) {
887       column_values[cnt++] = A->a[k];
888     }
889     for (; j<B->i[i+1]; j++) {
890       column_values[cnt++] = B->a[j];
891     }
892   }
893   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
894 
895   /* store the column values to the file */
896   if (!rank) {
897     MPI_Status status;
898     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
899     for (i=1; i<size; i++) {
900       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
901       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
902       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
903       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
904     }
905   } else {
906     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
907     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
908   }
909   ierr = PetscFree(column_values);CHKERRQ(ierr);
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
915 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
916 {
917   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
918   PetscErrorCode    ierr;
919   PetscMPIInt       rank = aij->rank,size = aij->size;
920   PetscTruth        isdraw,iascii,isbinary;
921   PetscViewer       sviewer;
922   PetscViewerFormat format;
923 
924   PetscFunctionBegin;
925   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
926   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
927   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
928   if (iascii) {
929     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
930     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
931       MatInfo    info;
932       PetscTruth inodes;
933 
934       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
935       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
936       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
937       if (!inodes) {
938         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
939 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
940       } else {
941         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
942 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
943       }
944       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
945       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
946       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
947       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
948       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
949       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
950       PetscFunctionReturn(0);
951     } else if (format == PETSC_VIEWER_ASCII_INFO) {
952       PetscInt   inodecount,inodelimit,*inodes;
953       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
954       if (inodes) {
955         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
956       } else {
957         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
958       }
959       PetscFunctionReturn(0);
960     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
961       PetscFunctionReturn(0);
962     }
963   } else if (isbinary) {
964     if (size == 1) {
965       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
966       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
967     } else {
968       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
969     }
970     PetscFunctionReturn(0);
971   } else if (isdraw) {
972     PetscDraw  draw;
973     PetscTruth isnull;
974     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
975     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
976   }
977 
978   if (size == 1) {
979     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
980     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
981   } else {
982     /* assemble the entire matrix onto first processor. */
983     Mat         A;
984     Mat_SeqAIJ  *Aloc;
985     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
986     PetscScalar *a;
987 
988     if (!rank) {
989       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
990     } else {
991       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
992     }
993     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
994     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
995     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
996     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
997 
998     /* copy over the A part */
999     Aloc = (Mat_SeqAIJ*)aij->A->data;
1000     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1001     row = aij->rstart;
1002     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1003     for (i=0; i<m; i++) {
1004       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1005       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1006     }
1007     aj = Aloc->j;
1008     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1009 
1010     /* copy over the B part */
1011     Aloc = (Mat_SeqAIJ*)aij->B->data;
1012     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     row  = aij->rstart;
1014     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1015     ct   = cols;
1016     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1017     for (i=0; i<m; i++) {
1018       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1019       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1020     }
1021     ierr = PetscFree(ct);CHKERRQ(ierr);
1022     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     /*
1025        Everyone has to call to draw the matrix since the graphics waits are
1026        synchronized across all processors that share the PetscDraw object
1027     */
1028     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1029     if (!rank) {
1030       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1031       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1032     }
1033     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1034     ierr = MatDestroy(A);CHKERRQ(ierr);
1035   }
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatView_MPIAIJ"
1041 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1042 {
1043   PetscErrorCode ierr;
1044   PetscTruth     iascii,isdraw,issocket,isbinary;
1045 
1046   PetscFunctionBegin;
1047   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1048   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1050   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1051   if (iascii || isdraw || isbinary || issocket) {
1052     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1053   } else {
1054     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1055   }
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatRelax_MPIAIJ"
1063 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1064 {
1065   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1066   PetscErrorCode ierr;
1067   Vec            bb1;
1068   PetscScalar    mone=-1.0;
1069 
1070   PetscFunctionBegin;
1071   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1072 
1073   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1074 
1075   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1076     if (flag & SOR_ZERO_INITIAL_GUESS) {
1077       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1078       its--;
1079     }
1080 
1081     while (its--) {
1082       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1083       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1084 
1085       /* update rhs: bb1 = bb - B*x */
1086       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1087       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1088 
1089       /* local sweep */
1090       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1091       CHKERRQ(ierr);
1092     }
1093   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1094     if (flag & SOR_ZERO_INITIAL_GUESS) {
1095       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1096       its--;
1097     }
1098     while (its--) {
1099       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1100       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1101 
1102       /* update rhs: bb1 = bb - B*x */
1103       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1104       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1105 
1106       /* local sweep */
1107       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1108       CHKERRQ(ierr);
1109     }
1110   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1111     if (flag & SOR_ZERO_INITIAL_GUESS) {
1112       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1113       its--;
1114     }
1115     while (its--) {
1116       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1117       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1118 
1119       /* update rhs: bb1 = bb - B*x */
1120       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1121       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1122 
1123       /* local sweep */
1124       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1125       CHKERRQ(ierr);
1126     }
1127   } else {
1128     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1129   }
1130 
1131   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1137 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1138 {
1139   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1140   Mat            A = mat->A,B = mat->B;
1141   PetscErrorCode ierr;
1142   PetscReal      isend[5],irecv[5];
1143 
1144   PetscFunctionBegin;
1145   info->block_size     = 1.0;
1146   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1147   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1148   isend[3] = info->memory;  isend[4] = info->mallocs;
1149   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1150   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1151   isend[3] += info->memory;  isend[4] += info->mallocs;
1152   if (flag == MAT_LOCAL) {
1153     info->nz_used      = isend[0];
1154     info->nz_allocated = isend[1];
1155     info->nz_unneeded  = isend[2];
1156     info->memory       = isend[3];
1157     info->mallocs      = isend[4];
1158   } else if (flag == MAT_GLOBAL_MAX) {
1159     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1160     info->nz_used      = irecv[0];
1161     info->nz_allocated = irecv[1];
1162     info->nz_unneeded  = irecv[2];
1163     info->memory       = irecv[3];
1164     info->mallocs      = irecv[4];
1165   } else if (flag == MAT_GLOBAL_SUM) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   }
1173   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1174   info->fill_ratio_needed = 0;
1175   info->factor_mallocs    = 0;
1176   info->rows_global       = (double)matin->M;
1177   info->columns_global    = (double)matin->N;
1178   info->rows_local        = (double)matin->m;
1179   info->columns_local     = (double)matin->N;
1180 
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetOption_MPIAIJ"
1186 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1187 {
1188   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   switch (op) {
1193   case MAT_NO_NEW_NONZERO_LOCATIONS:
1194   case MAT_YES_NEW_NONZERO_LOCATIONS:
1195   case MAT_COLUMNS_UNSORTED:
1196   case MAT_COLUMNS_SORTED:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_KEEP_ZEROED_ROWS:
1199   case MAT_NEW_NONZERO_LOCATION_ERR:
1200   case MAT_USE_INODES:
1201   case MAT_DO_NOT_USE_INODES:
1202   case MAT_IGNORE_ZERO_ENTRIES:
1203     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = PETSC_TRUE;
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1210     break;
1211   case MAT_ROWS_SORTED:
1212   case MAT_ROWS_UNSORTED:
1213   case MAT_YES_NEW_DIAGONALS:
1214     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1215     break;
1216   case MAT_COLUMN_ORIENTED:
1217     a->roworiented = PETSC_FALSE;
1218     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1220     break;
1221   case MAT_IGNORE_OFF_PROC_ENTRIES:
1222     a->donotstash = PETSC_TRUE;
1223     break;
1224   case MAT_NO_NEW_DIAGONALS:
1225     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1226   case MAT_SYMMETRIC:
1227   case MAT_STRUCTURALLY_SYMMETRIC:
1228   case MAT_HERMITIAN:
1229   case MAT_SYMMETRY_ETERNAL:
1230     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1231     break;
1232   case MAT_NOT_SYMMETRIC:
1233   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1234   case MAT_NOT_HERMITIAN:
1235   case MAT_NOT_SYMMETRY_ETERNAL:
1236     break;
1237   default:
1238     SETERRQ(PETSC_ERR_SUP,"unknown option");
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatGetRow_MPIAIJ"
1245 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1246 {
1247   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1248   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1249   PetscErrorCode ierr;
1250   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1251   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1252   PetscInt       *cmap,*idx_p;
1253 
1254   PetscFunctionBegin;
1255   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1256   mat->getrowactive = PETSC_TRUE;
1257 
1258   if (!mat->rowvalues && (idx || v)) {
1259     /*
1260         allocate enough space to hold information from the longest row.
1261     */
1262     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1263     PetscInt     max = 1,tmp;
1264     for (i=0; i<matin->m; i++) {
1265       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1266       if (max < tmp) { max = tmp; }
1267     }
1268     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1269     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1270   }
1271 
1272   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1273   lrow = row - rstart;
1274 
1275   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1276   if (!v)   {pvA = 0; pvB = 0;}
1277   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1278   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1279   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1280   nztot = nzA + nzB;
1281 
1282   cmap  = mat->garray;
1283   if (v  || idx) {
1284     if (nztot) {
1285       /* Sort by increasing column numbers, assuming A and B already sorted */
1286       PetscInt imark = -1;
1287       if (v) {
1288         *v = v_p = mat->rowvalues;
1289         for (i=0; i<nzB; i++) {
1290           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1291           else break;
1292         }
1293         imark = i;
1294         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1295         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1296       }
1297       if (idx) {
1298         *idx = idx_p = mat->rowindices;
1299         if (imark > -1) {
1300           for (i=0; i<imark; i++) {
1301             idx_p[i] = cmap[cworkB[i]];
1302           }
1303         } else {
1304           for (i=0; i<nzB; i++) {
1305             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1306             else break;
1307           }
1308           imark = i;
1309         }
1310         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1311         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1312       }
1313     } else {
1314       if (idx) *idx = 0;
1315       if (v)   *v   = 0;
1316     }
1317   }
1318   *nz = nztot;
1319   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1320   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1326 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1327 {
1328   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1329 
1330   PetscFunctionBegin;
1331   if (!aij->getrowactive) {
1332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1333   }
1334   aij->getrowactive = PETSC_FALSE;
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatNorm_MPIAIJ"
1340 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1341 {
1342   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1343   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1344   PetscErrorCode ierr;
1345   PetscInt       i,j,cstart = aij->cstart;
1346   PetscReal      sum = 0.0;
1347   PetscScalar    *v;
1348 
1349   PetscFunctionBegin;
1350   if (aij->size == 1) {
1351     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1352   } else {
1353     if (type == NORM_FROBENIUS) {
1354       v = amat->a;
1355       for (i=0; i<amat->nz; i++) {
1356 #if defined(PETSC_USE_COMPLEX)
1357         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1358 #else
1359         sum += (*v)*(*v); v++;
1360 #endif
1361       }
1362       v = bmat->a;
1363       for (i=0; i<bmat->nz; i++) {
1364 #if defined(PETSC_USE_COMPLEX)
1365         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1366 #else
1367         sum += (*v)*(*v); v++;
1368 #endif
1369       }
1370       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1371       *norm = sqrt(*norm);
1372     } else if (type == NORM_1) { /* max column norm */
1373       PetscReal *tmp,*tmp2;
1374       PetscInt    *jj,*garray = aij->garray;
1375       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1376       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1377       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1378       *norm = 0.0;
1379       v = amat->a; jj = amat->j;
1380       for (j=0; j<amat->nz; j++) {
1381         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1382       }
1383       v = bmat->a; jj = bmat->j;
1384       for (j=0; j<bmat->nz; j++) {
1385         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1386       }
1387       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1388       for (j=0; j<mat->N; j++) {
1389         if (tmp2[j] > *norm) *norm = tmp2[j];
1390       }
1391       ierr = PetscFree(tmp);CHKERRQ(ierr);
1392       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1393     } else if (type == NORM_INFINITY) { /* max row norm */
1394       PetscReal ntemp = 0.0;
1395       for (j=0; j<aij->A->m; j++) {
1396         v = amat->a + amat->i[j];
1397         sum = 0.0;
1398         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1399           sum += PetscAbsScalar(*v); v++;
1400         }
1401         v = bmat->a + bmat->i[j];
1402         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1403           sum += PetscAbsScalar(*v); v++;
1404         }
1405         if (sum > ntemp) ntemp = sum;
1406       }
1407       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1408     } else {
1409       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1410     }
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatTranspose_MPIAIJ"
1417 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1418 {
1419   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1420   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1421   PetscErrorCode ierr;
1422   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1423   Mat            B;
1424   PetscScalar    *array;
1425 
1426   PetscFunctionBegin;
1427   if (!matout && M != N) {
1428     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1429   }
1430 
1431   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1432   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1433   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1434 
1435   /* copy over the A part */
1436   Aloc = (Mat_SeqAIJ*)a->A->data;
1437   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1438   row = a->rstart;
1439   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1440   for (i=0; i<m; i++) {
1441     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1442     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1443   }
1444   aj = Aloc->j;
1445   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1446 
1447   /* copy over the B part */
1448   Aloc = (Mat_SeqAIJ*)a->B->data;
1449   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1450   row  = a->rstart;
1451   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1452   ct   = cols;
1453   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1454   for (i=0; i<m; i++) {
1455     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1456     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1457   }
1458   ierr = PetscFree(ct);CHKERRQ(ierr);
1459   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   if (matout) {
1462     *matout = B;
1463   } else {
1464     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1471 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1472 {
1473   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1474   Mat            a = aij->A,b = aij->B;
1475   PetscErrorCode ierr;
1476   PetscInt       s1,s2,s3;
1477 
1478   PetscFunctionBegin;
1479   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1480   if (rr) {
1481     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1482     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1483     /* Overlap communication with computation. */
1484     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1485   }
1486   if (ll) {
1487     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1488     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1489     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1490   }
1491   /* scale  the diagonal block */
1492   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1493 
1494   if (rr) {
1495     /* Do a scatter end and then right scale the off-diagonal block */
1496     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1497     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1498   }
1499 
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1506 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1507 {
1508   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   if (!a->rank) {
1513     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1520 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1521 {
1522   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1527   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1532 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1533 {
1534   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatEqual_MPIAIJ"
1544 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1545 {
1546   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1547   Mat            a,b,c,d;
1548   PetscTruth     flg;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   a = matA->A; b = matA->B;
1553   c = matB->A; d = matB->B;
1554 
1555   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1556   if (flg) {
1557     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1558   }
1559   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatCopy_MPIAIJ"
1565 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1566 {
1567   PetscErrorCode ierr;
1568   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1569   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1570 
1571   PetscFunctionBegin;
1572   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1573   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1574     /* because of the column compression in the off-processor part of the matrix a->B,
1575        the number of columns in a->B and b->B may be different, hence we cannot call
1576        the MatCopy() directly on the two parts. If need be, we can provide a more
1577        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1578        then copying the submatrices */
1579     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1580   } else {
1581     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1582     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1583   }
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1589 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1590 {
1591   PetscErrorCode ierr;
1592 
1593   PetscFunctionBegin;
1594   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 #include "petscblaslapack.h"
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatAXPY_MPIAIJ"
1601 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1602 {
1603   PetscErrorCode ierr;
1604   PetscInt       i;
1605   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1606   PetscBLASInt   bnz,one=1;
1607   Mat_SeqAIJ     *x,*y;
1608 
1609   PetscFunctionBegin;
1610   if (str == SAME_NONZERO_PATTERN) {
1611     x = (Mat_SeqAIJ *)xx->A->data;
1612     y = (Mat_SeqAIJ *)yy->A->data;
1613     bnz = (PetscBLASInt)x->nz;
1614     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1615     x = (Mat_SeqAIJ *)xx->B->data;
1616     y = (Mat_SeqAIJ *)yy->B->data;
1617     bnz = (PetscBLASInt)x->nz;
1618     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1619   } else if (str == SUBSET_NONZERO_PATTERN) {
1620     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1621 
1622     x = (Mat_SeqAIJ *)xx->B->data;
1623     y = (Mat_SeqAIJ *)yy->B->data;
1624     if (y->xtoy && y->XtoY != xx->B) {
1625       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1626       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1627     }
1628     if (!y->xtoy) { /* get xtoy */
1629       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1630       y->XtoY = xx->B;
1631     }
1632     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1633   } else {
1634     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 /* -------------------------------------------------------------------*/
1640 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1641        MatGetRow_MPIAIJ,
1642        MatRestoreRow_MPIAIJ,
1643        MatMult_MPIAIJ,
1644 /* 4*/ MatMultAdd_MPIAIJ,
1645        MatMultTranspose_MPIAIJ,
1646        MatMultTransposeAdd_MPIAIJ,
1647        0,
1648        0,
1649        0,
1650 /*10*/ 0,
1651        0,
1652        0,
1653        MatRelax_MPIAIJ,
1654        MatTranspose_MPIAIJ,
1655 /*15*/ MatGetInfo_MPIAIJ,
1656        MatEqual_MPIAIJ,
1657        MatGetDiagonal_MPIAIJ,
1658        MatDiagonalScale_MPIAIJ,
1659        MatNorm_MPIAIJ,
1660 /*20*/ MatAssemblyBegin_MPIAIJ,
1661        MatAssemblyEnd_MPIAIJ,
1662        0,
1663        MatSetOption_MPIAIJ,
1664        MatZeroEntries_MPIAIJ,
1665 /*25*/ MatZeroRows_MPIAIJ,
1666        0,
1667        0,
1668        0,
1669        0,
1670 /*30*/ MatSetUpPreallocation_MPIAIJ,
1671        0,
1672        0,
1673        0,
1674        0,
1675 /*35*/ MatDuplicate_MPIAIJ,
1676        0,
1677        0,
1678        0,
1679        0,
1680 /*40*/ MatAXPY_MPIAIJ,
1681        MatGetSubMatrices_MPIAIJ,
1682        MatIncreaseOverlap_MPIAIJ,
1683        MatGetValues_MPIAIJ,
1684        MatCopy_MPIAIJ,
1685 /*45*/ MatPrintHelp_MPIAIJ,
1686        MatScale_MPIAIJ,
1687        0,
1688        0,
1689        0,
1690 /*50*/ MatSetBlockSize_MPIAIJ,
1691        0,
1692        0,
1693        0,
1694        0,
1695 /*55*/ MatFDColoringCreate_MPIAIJ,
1696        0,
1697        MatSetUnfactored_MPIAIJ,
1698        0,
1699        0,
1700 /*60*/ MatGetSubMatrix_MPIAIJ,
1701        MatDestroy_MPIAIJ,
1702        MatView_MPIAIJ,
1703        MatGetPetscMaps_Petsc,
1704        0,
1705 /*65*/ 0,
1706        0,
1707        0,
1708        0,
1709        0,
1710 /*70*/ 0,
1711        0,
1712        MatSetColoring_MPIAIJ,
1713 #if defined(PETSC_HAVE_ADIC)
1714        MatSetValuesAdic_MPIAIJ,
1715 #else
1716        0,
1717 #endif
1718        MatSetValuesAdifor_MPIAIJ,
1719 /*75*/ 0,
1720        0,
1721        0,
1722        0,
1723        0,
1724 /*80*/ 0,
1725        0,
1726        0,
1727        0,
1728 /*84*/ MatLoad_MPIAIJ,
1729        0,
1730        0,
1731        0,
1732        0,
1733        0,
1734 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1735        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1736        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1737        MatPtAP_Basic,
1738        MatPtAPSymbolic_MPIAIJ,
1739 /*95*/ MatPtAPNumeric_MPIAIJ,
1740        0,
1741        0,
1742        0,
1743        0,
1744 /*100*/0,
1745        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1746        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1747 };
1748 
1749 /* ----------------------------------------------------------------------------------------*/
1750 
1751 EXTERN_C_BEGIN
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1754 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1755 {
1756   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1761   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1762   PetscFunctionReturn(0);
1763 }
1764 EXTERN_C_END
1765 
1766 EXTERN_C_BEGIN
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1769 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1770 {
1771   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1772   PetscErrorCode ierr;
1773 
1774   PetscFunctionBegin;
1775   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1776   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 EXTERN_C_END
1780 
1781 #include "petscpc.h"
1782 EXTERN_C_BEGIN
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1785 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1786 {
1787   Mat_MPIAIJ     *b;
1788   PetscErrorCode ierr;
1789   PetscInt       i;
1790 
1791   PetscFunctionBegin;
1792   B->preallocated = PETSC_TRUE;
1793   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1794   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1795   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1796   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1797   if (d_nnz) {
1798     for (i=0; i<B->m; i++) {
1799       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1800     }
1801   }
1802   if (o_nnz) {
1803     for (i=0; i<B->m; i++) {
1804       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1805     }
1806   }
1807   b = (Mat_MPIAIJ*)B->data;
1808   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1809   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1810 
1811   PetscFunctionReturn(0);
1812 }
1813 EXTERN_C_END
1814 
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1817 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1818 {
1819   Mat            mat;
1820   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   *newmat       = 0;
1825   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1826   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1827   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1828   a    = (Mat_MPIAIJ*)mat->data;
1829 
1830   mat->factor       = matin->factor;
1831   mat->bs           = matin->bs;
1832   mat->assembled    = PETSC_TRUE;
1833   mat->insertmode   = NOT_SET_VALUES;
1834   mat->preallocated = PETSC_TRUE;
1835 
1836   a->rstart       = oldmat->rstart;
1837   a->rend         = oldmat->rend;
1838   a->cstart       = oldmat->cstart;
1839   a->cend         = oldmat->cend;
1840   a->size         = oldmat->size;
1841   a->rank         = oldmat->rank;
1842   a->donotstash   = oldmat->donotstash;
1843   a->roworiented  = oldmat->roworiented;
1844   a->rowindices   = 0;
1845   a->rowvalues    = 0;
1846   a->getrowactive = PETSC_FALSE;
1847 
1848   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1849   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1850   if (oldmat->colmap) {
1851 #if defined (PETSC_USE_CTABLE)
1852     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1853 #else
1854     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1855     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1856     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1857 #endif
1858   } else a->colmap = 0;
1859   if (oldmat->garray) {
1860     PetscInt len;
1861     len  = oldmat->B->n;
1862     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1863     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1864     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1865   } else a->garray = 0;
1866 
1867   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1868   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1869   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1870   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1871   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1872   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1873   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1874   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1875   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1876   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1877   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1878   *newmat = mat;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #include "petscsys.h"
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatLoad_MPIAIJ"
1886 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1887 {
1888   Mat            A;
1889   PetscScalar    *vals,*svals;
1890   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1891   MPI_Status     status;
1892   PetscErrorCode ierr;
1893   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1894   PetscInt       i,nz,j,rstart,rend,mmax;
1895   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1896   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1897   PetscInt       cend,cstart,n,*rowners;
1898   int            fd;
1899 
1900   PetscFunctionBegin;
1901   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1902   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1903   if (!rank) {
1904     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1905     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1906     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1907   }
1908 
1909   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1910   M = header[1]; N = header[2];
1911   /* determine ownership of all rows */
1912   m    = M/size + ((M % size) > rank);
1913   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1914   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1915 
1916   /* First process needs enough room for process with most rows */
1917   if (!rank) {
1918     mmax       = rowners[1];
1919     for (i=2; i<size; i++) {
1920       mmax = PetscMax(mmax,rowners[i]);
1921     }
1922   } else mmax = m;
1923 
1924   rowners[0] = 0;
1925   for (i=2; i<=size; i++) {
1926     mmax       = PetscMax(mmax,rowners[i]);
1927     rowners[i] += rowners[i-1];
1928   }
1929   rstart = rowners[rank];
1930   rend   = rowners[rank+1];
1931 
1932   /* distribute row lengths to all processors */
1933   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1934   if (!rank) {
1935     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1936     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1937     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1938     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1939     for (j=0; j<m; j++) {
1940       procsnz[0] += ourlens[j];
1941     }
1942     for (i=1; i<size; i++) {
1943       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1944       /* calculate the number of nonzeros on each processor */
1945       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1946         procsnz[i] += rowlengths[j];
1947       }
1948       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1949     }
1950     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1951   } else {
1952     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1953   }
1954 
1955   if (!rank) {
1956     /* determine max buffer needed and allocate it */
1957     maxnz = 0;
1958     for (i=0; i<size; i++) {
1959       maxnz = PetscMax(maxnz,procsnz[i]);
1960     }
1961     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1962 
1963     /* read in my part of the matrix column indices  */
1964     nz   = procsnz[0];
1965     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1966     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1967 
1968     /* read in every one elses and ship off */
1969     for (i=1; i<size; i++) {
1970       nz   = procsnz[i];
1971       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1972       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1973     }
1974     ierr = PetscFree(cols);CHKERRQ(ierr);
1975   } else {
1976     /* determine buffer space needed for message */
1977     nz = 0;
1978     for (i=0; i<m; i++) {
1979       nz += ourlens[i];
1980     }
1981     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1982 
1983     /* receive message of column indices*/
1984     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1985     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1986     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1987   }
1988 
1989   /* determine column ownership if matrix is not square */
1990   if (N != M) {
1991     n      = N/size + ((N % size) > rank);
1992     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1993     cstart = cend - n;
1994   } else {
1995     cstart = rstart;
1996     cend   = rend;
1997     n      = cend - cstart;
1998   }
1999 
2000   /* loop over local rows, determining number of off diagonal entries */
2001   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2002   jj = 0;
2003   for (i=0; i<m; i++) {
2004     for (j=0; j<ourlens[i]; j++) {
2005       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2006       jj++;
2007     }
2008   }
2009 
2010   /* create our matrix */
2011   for (i=0; i<m; i++) {
2012     ourlens[i] -= offlens[i];
2013   }
2014   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2015   ierr = MatSetType(A,type);CHKERRQ(ierr);
2016   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2017 
2018   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2019   for (i=0; i<m; i++) {
2020     ourlens[i] += offlens[i];
2021   }
2022 
2023   if (!rank) {
2024     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2025 
2026     /* read in my part of the matrix numerical values  */
2027     nz   = procsnz[0];
2028     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2029 
2030     /* insert into matrix */
2031     jj      = rstart;
2032     smycols = mycols;
2033     svals   = vals;
2034     for (i=0; i<m; i++) {
2035       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2036       smycols += ourlens[i];
2037       svals   += ourlens[i];
2038       jj++;
2039     }
2040 
2041     /* read in other processors and ship out */
2042     for (i=1; i<size; i++) {
2043       nz   = procsnz[i];
2044       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2045       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2046     }
2047     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2048   } else {
2049     /* receive numeric values */
2050     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2051 
2052     /* receive message of values*/
2053     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2054     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2055     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2056 
2057     /* insert into matrix */
2058     jj      = rstart;
2059     smycols = mycols;
2060     svals   = vals;
2061     for (i=0; i<m; i++) {
2062       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2063       smycols += ourlens[i];
2064       svals   += ourlens[i];
2065       jj++;
2066     }
2067   }
2068   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2069   ierr = PetscFree(vals);CHKERRQ(ierr);
2070   ierr = PetscFree(mycols);CHKERRQ(ierr);
2071   ierr = PetscFree(rowners);CHKERRQ(ierr);
2072 
2073   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2075   *newmat = A;
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2081 /*
2082     Not great since it makes two copies of the submatrix, first an SeqAIJ
2083   in local and then by concatenating the local matrices the end result.
2084   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2085 */
2086 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2087 {
2088   PetscErrorCode ierr;
2089   PetscMPIInt    rank,size;
2090   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2091   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2092   Mat            *local,M,Mreuse;
2093   PetscScalar    *vwork,*aa;
2094   MPI_Comm       comm = mat->comm;
2095   Mat_SeqAIJ     *aij;
2096 
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (call ==  MAT_REUSE_MATRIX) {
2103     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2104     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2105     local = &Mreuse;
2106     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2107   } else {
2108     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2109     Mreuse = *local;
2110     ierr   = PetscFree(local);CHKERRQ(ierr);
2111   }
2112 
2113   /*
2114       m - number of local rows
2115       n - number of columns (same on all processors)
2116       rstart - first row in new global matrix generated
2117   */
2118   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2119   if (call == MAT_INITIAL_MATRIX) {
2120     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2121     ii  = aij->i;
2122     jj  = aij->j;
2123 
2124     /*
2125         Determine the number of non-zeros in the diagonal and off-diagonal
2126         portions of the matrix in order to do correct preallocation
2127     */
2128 
2129     /* first get start and end of "diagonal" columns */
2130     if (csize == PETSC_DECIDE) {
2131       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2132       if (mglobal == n) { /* square matrix */
2133 	nlocal = m;
2134       } else {
2135         nlocal = n/size + ((n % size) > rank);
2136       }
2137     } else {
2138       nlocal = csize;
2139     }
2140     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2141     rstart = rend - nlocal;
2142     if (rank == size - 1 && rend != n) {
2143       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2144     }
2145 
2146     /* next, compute all the lengths */
2147     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2148     olens = dlens + m;
2149     for (i=0; i<m; i++) {
2150       jend = ii[i+1] - ii[i];
2151       olen = 0;
2152       dlen = 0;
2153       for (j=0; j<jend; j++) {
2154         if (*jj < rstart || *jj >= rend) olen++;
2155         else dlen++;
2156         jj++;
2157       }
2158       olens[i] = olen;
2159       dlens[i] = dlen;
2160     }
2161     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2162     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2163     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2164     ierr = PetscFree(dlens);CHKERRQ(ierr);
2165   } else {
2166     PetscInt ml,nl;
2167 
2168     M = *newmat;
2169     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2170     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2171     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2172     /*
2173          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2174        rather than the slower MatSetValues().
2175     */
2176     M->was_assembled = PETSC_TRUE;
2177     M->assembled     = PETSC_FALSE;
2178   }
2179   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2180   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2181   ii  = aij->i;
2182   jj  = aij->j;
2183   aa  = aij->a;
2184   for (i=0; i<m; i++) {
2185     row   = rstart + i;
2186     nz    = ii[i+1] - ii[i];
2187     cwork = jj;     jj += nz;
2188     vwork = aa;     aa += nz;
2189     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2190   }
2191 
2192   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   *newmat = M;
2195 
2196   /* save submatrix used in processor for next request */
2197   if (call ==  MAT_INITIAL_MATRIX) {
2198     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2199     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2200   }
2201 
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 EXTERN_C_BEGIN
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2208 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2209 {
2210   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2211   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2212   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2213   const PetscInt *JJ;
2214   PetscScalar    *values;
2215   PetscErrorCode ierr;
2216 
2217   PetscFunctionBegin;
2218 #if defined(PETSC_OPT_g)
2219   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2220 #endif
2221   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2222   o_nnz = d_nnz + m;
2223 
2224   for (i=0; i<m; i++) {
2225     nnz     = I[i+1]- I[i];
2226     JJ      = J + I[i];
2227     nnz_max = PetscMax(nnz_max,nnz);
2228 #if defined(PETSC_OPT_g)
2229     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2230 #endif
2231     for (j=0; j<nnz; j++) {
2232       if (*JJ >= cstart) break;
2233       JJ++;
2234     }
2235     d = 0;
2236     for (; j<nnz; j++) {
2237       if (*JJ++ >= cend) break;
2238       d++;
2239     }
2240     d_nnz[i] = d;
2241     o_nnz[i] = nnz - d;
2242   }
2243   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2244   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2245 
2246   if (v) values = (PetscScalar*)v;
2247   else {
2248     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2249     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2250   }
2251 
2252   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2253   for (i=0; i<m; i++) {
2254     ii   = i + rstart;
2255     nnz  = I[i+1]- I[i];
2256     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2257   }
2258   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2260   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2261 
2262   if (!v) {
2263     ierr = PetscFree(values);CHKERRQ(ierr);
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 EXTERN_C_END
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2271 /*@C
2272    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2273    (the default parallel PETSc format).
2274 
2275    Collective on MPI_Comm
2276 
2277    Input Parameters:
2278 +  A - the matrix
2279 .  i - the indices into j for the start of each local row (starts with zero)
2280 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2281 -  v - optional values in the matrix
2282 
2283    Level: developer
2284 
2285 .keywords: matrix, aij, compressed row, sparse, parallel
2286 
2287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2288 @*/
2289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2290 {
2291   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2292 
2293   PetscFunctionBegin;
2294   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2295   if (f) {
2296     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2297   }
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2303 /*@C
2304    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2305    (the default parallel PETSc format).  For good matrix assembly performance
2306    the user should preallocate the matrix storage by setting the parameters
2307    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2308    performance can be increased by more than a factor of 50.
2309 
2310    Collective on MPI_Comm
2311 
2312    Input Parameters:
2313 +  A - the matrix
2314 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2315            (same value is used for all local rows)
2316 .  d_nnz - array containing the number of nonzeros in the various rows of the
2317            DIAGONAL portion of the local submatrix (possibly different for each row)
2318            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2319            The size of this array is equal to the number of local rows, i.e 'm'.
2320            You must leave room for the diagonal entry even if it is zero.
2321 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2322            submatrix (same value is used for all local rows).
2323 -  o_nnz - array containing the number of nonzeros in the various rows of the
2324            OFF-DIAGONAL portion of the local submatrix (possibly different for
2325            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2326            structure. The size of this array is equal to the number
2327            of local rows, i.e 'm'.
2328 
2329    If the *_nnz parameter is given then the *_nz parameter is ignored
2330 
2331    The AIJ format (also called the Yale sparse matrix format or
2332    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2333    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2334 
2335    The parallel matrix is partitioned such that the first m0 rows belong to
2336    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2337    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2338 
2339    The DIAGONAL portion of the local submatrix of a processor can be defined
2340    as the submatrix which is obtained by extraction the part corresponding
2341    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2342    first row that belongs to the processor, and r2 is the last row belonging
2343    to the this processor. This is a square mxm matrix. The remaining portion
2344    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2345 
2346    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2347 
2348    Example usage:
2349 
2350    Consider the following 8x8 matrix with 34 non-zero values, that is
2351    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2352    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2353    as follows:
2354 
2355 .vb
2356             1  2  0  |  0  3  0  |  0  4
2357     Proc0   0  5  6  |  7  0  0  |  8  0
2358             9  0 10  | 11  0  0  | 12  0
2359     -------------------------------------
2360            13  0 14  | 15 16 17  |  0  0
2361     Proc1   0 18  0  | 19 20 21  |  0  0
2362             0  0  0  | 22 23  0  | 24  0
2363     -------------------------------------
2364     Proc2  25 26 27  |  0  0 28  | 29  0
2365            30  0  0  | 31 32 33  |  0 34
2366 .ve
2367 
2368    This can be represented as a collection of submatrices as:
2369 
2370 .vb
2371       A B C
2372       D E F
2373       G H I
2374 .ve
2375 
2376    Where the submatrices A,B,C are owned by proc0, D,E,F are
2377    owned by proc1, G,H,I are owned by proc2.
2378 
2379    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2380    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2381    The 'M','N' parameters are 8,8, and have the same values on all procs.
2382 
2383    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2384    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2385    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2386    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2387    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2388    matrix, ans [DF] as another SeqAIJ matrix.
2389 
2390    When d_nz, o_nz parameters are specified, d_nz storage elements are
2391    allocated for every row of the local diagonal submatrix, and o_nz
2392    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2393    One way to choose d_nz and o_nz is to use the max nonzerors per local
2394    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2395    In this case, the values of d_nz,o_nz are:
2396 .vb
2397      proc0 : dnz = 2, o_nz = 2
2398      proc1 : dnz = 3, o_nz = 2
2399      proc2 : dnz = 1, o_nz = 4
2400 .ve
2401    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2402    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2403    for proc3. i.e we are using 12+15+10=37 storage locations to store
2404    34 values.
2405 
2406    When d_nnz, o_nnz parameters are specified, the storage is specified
2407    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2408    In the above case the values for d_nnz,o_nnz are:
2409 .vb
2410      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2411      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2412      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2413 .ve
2414    Here the space allocated is sum of all the above values i.e 34, and
2415    hence pre-allocation is perfect.
2416 
2417    Level: intermediate
2418 
2419 .keywords: matrix, aij, compressed row, sparse, parallel
2420 
2421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2422           MPIAIJ
2423 @*/
2424 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2425 {
2426   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2427 
2428   PetscFunctionBegin;
2429   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2430   if (f) {
2431     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2432   }
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatCreateMPIAIJ"
2438 /*@C
2439    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2440    (the default parallel PETSc format).  For good matrix assembly performance
2441    the user should preallocate the matrix storage by setting the parameters
2442    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2443    performance can be increased by more than a factor of 50.
2444 
2445    Collective on MPI_Comm
2446 
2447    Input Parameters:
2448 +  comm - MPI communicator
2449 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2450            This value should be the same as the local size used in creating the
2451            y vector for the matrix-vector product y = Ax.
2452 .  n - This value should be the same as the local size used in creating the
2453        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2454        calculated if N is given) For square matrices n is almost always m.
2455 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2456 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2457 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2458            (same value is used for all local rows)
2459 .  d_nnz - array containing the number of nonzeros in the various rows of the
2460            DIAGONAL portion of the local submatrix (possibly different for each row)
2461            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2462            The size of this array is equal to the number of local rows, i.e 'm'.
2463            You must leave room for the diagonal entry even if it is zero.
2464 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2465            submatrix (same value is used for all local rows).
2466 -  o_nnz - array containing the number of nonzeros in the various rows of the
2467            OFF-DIAGONAL portion of the local submatrix (possibly different for
2468            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2469            structure. The size of this array is equal to the number
2470            of local rows, i.e 'm'.
2471 
2472    Output Parameter:
2473 .  A - the matrix
2474 
2475    Notes:
2476    If the *_nnz parameter is given then the *_nz parameter is ignored
2477 
2478    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2479    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2480    storage requirements for this matrix.
2481 
2482    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2483    processor than it must be used on all processors that share the object for
2484    that argument.
2485 
2486    The user MUST specify either the local or global matrix dimensions
2487    (possibly both).
2488 
2489    The parallel matrix is partitioned such that the first m0 rows belong to
2490    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2491    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2492 
2493    The DIAGONAL portion of the local submatrix of a processor can be defined
2494    as the submatrix which is obtained by extraction the part corresponding
2495    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2496    first row that belongs to the processor, and r2 is the last row belonging
2497    to the this processor. This is a square mxm matrix. The remaining portion
2498    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2499 
2500    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2501 
2502    When calling this routine with a single process communicator, a matrix of
2503    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2504    type of communicator, use the construction mechanism:
2505      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2506 
2507    By default, this format uses inodes (identical nodes) when possible.
2508    We search for consecutive rows with the same nonzero structure, thereby
2509    reusing matrix information to achieve increased efficiency.
2510 
2511    Options Database Keys:
2512 +  -mat_no_inode  - Do not use inodes
2513 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2514 -  -mat_aij_oneindex - Internally use indexing starting at 1
2515         rather than 0.  Note that when calling MatSetValues(),
2516         the user still MUST index entries starting at 0!
2517 
2518 
2519    Example usage:
2520 
2521    Consider the following 8x8 matrix with 34 non-zero values, that is
2522    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2523    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2524    as follows:
2525 
2526 .vb
2527             1  2  0  |  0  3  0  |  0  4
2528     Proc0   0  5  6  |  7  0  0  |  8  0
2529             9  0 10  | 11  0  0  | 12  0
2530     -------------------------------------
2531            13  0 14  | 15 16 17  |  0  0
2532     Proc1   0 18  0  | 19 20 21  |  0  0
2533             0  0  0  | 22 23  0  | 24  0
2534     -------------------------------------
2535     Proc2  25 26 27  |  0  0 28  | 29  0
2536            30  0  0  | 31 32 33  |  0 34
2537 .ve
2538 
2539    This can be represented as a collection of submatrices as:
2540 
2541 .vb
2542       A B C
2543       D E F
2544       G H I
2545 .ve
2546 
2547    Where the submatrices A,B,C are owned by proc0, D,E,F are
2548    owned by proc1, G,H,I are owned by proc2.
2549 
2550    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2551    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2552    The 'M','N' parameters are 8,8, and have the same values on all procs.
2553 
2554    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2555    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2556    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2557    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2558    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2559    matrix, ans [DF] as another SeqAIJ matrix.
2560 
2561    When d_nz, o_nz parameters are specified, d_nz storage elements are
2562    allocated for every row of the local diagonal submatrix, and o_nz
2563    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2564    One way to choose d_nz and o_nz is to use the max nonzerors per local
2565    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2566    In this case, the values of d_nz,o_nz are:
2567 .vb
2568      proc0 : dnz = 2, o_nz = 2
2569      proc1 : dnz = 3, o_nz = 2
2570      proc2 : dnz = 1, o_nz = 4
2571 .ve
2572    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2573    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2574    for proc3. i.e we are using 12+15+10=37 storage locations to store
2575    34 values.
2576 
2577    When d_nnz, o_nnz parameters are specified, the storage is specified
2578    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2579    In the above case the values for d_nnz,o_nnz are:
2580 .vb
2581      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2582      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2583      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2584 .ve
2585    Here the space allocated is sum of all the above values i.e 34, and
2586    hence pre-allocation is perfect.
2587 
2588    Level: intermediate
2589 
2590 .keywords: matrix, aij, compressed row, sparse, parallel
2591 
2592 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2593           MPIAIJ
2594 @*/
2595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2596 {
2597   PetscErrorCode ierr;
2598   PetscMPIInt    size;
2599 
2600   PetscFunctionBegin;
2601   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2603   if (size > 1) {
2604     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2605     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2606   } else {
2607     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2608     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2615 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2616 {
2617   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2618 
2619   PetscFunctionBegin;
2620   *Ad     = a->A;
2621   *Ao     = a->B;
2622   *colmap = a->garray;
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2628 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2629 {
2630   PetscErrorCode ierr;
2631   PetscInt       i;
2632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2633 
2634   PetscFunctionBegin;
2635   if (coloring->ctype == IS_COLORING_LOCAL) {
2636     ISColoringValue *allcolors,*colors;
2637     ISColoring      ocoloring;
2638 
2639     /* set coloring for diagonal portion */
2640     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2641 
2642     /* set coloring for off-diagonal portion */
2643     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2644     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2645     for (i=0; i<a->B->n; i++) {
2646       colors[i] = allcolors[a->garray[i]];
2647     }
2648     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2649     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2650     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2651     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2652   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2653     ISColoringValue *colors;
2654     PetscInt             *larray;
2655     ISColoring      ocoloring;
2656 
2657     /* set coloring for diagonal portion */
2658     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2659     for (i=0; i<a->A->n; i++) {
2660       larray[i] = i + a->cstart;
2661     }
2662     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2663     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2664     for (i=0; i<a->A->n; i++) {
2665       colors[i] = coloring->colors[larray[i]];
2666     }
2667     ierr = PetscFree(larray);CHKERRQ(ierr);
2668     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2669     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2670     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2671 
2672     /* set coloring for off-diagonal portion */
2673     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2674     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2675     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2676     for (i=0; i<a->B->n; i++) {
2677       colors[i] = coloring->colors[larray[i]];
2678     }
2679     ierr = PetscFree(larray);CHKERRQ(ierr);
2680     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2681     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2682     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2683   } else {
2684     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2685   }
2686 
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #if defined(PETSC_HAVE_ADIC)
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2693 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2694 {
2695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2700   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2701   PetscFunctionReturn(0);
2702 }
2703 #endif
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2707 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2708 {
2709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2710   PetscErrorCode ierr;
2711 
2712   PetscFunctionBegin;
2713   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2714   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatMerge"
2720 /*@C
2721       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2722                  matrices from each processor
2723 
2724     Collective on MPI_Comm
2725 
2726    Input Parameters:
2727 +    comm - the communicators the parallel matrix will live on
2728 .    inmat - the input sequential matrices
2729 .    n - number of local columns (or PETSC_DECIDE)
2730 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2731 
2732    Output Parameter:
2733 .    outmat - the parallel matrix generated
2734 
2735     Level: advanced
2736 
2737    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2738 
2739 @*/
2740 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2741 {
2742   PetscErrorCode ierr;
2743   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2744   PetscInt       *indx;
2745   PetscScalar    *values;
2746   PetscMap       columnmap,rowmap;
2747 
2748   PetscFunctionBegin;
2749     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2750   /*
2751   PetscMPIInt       rank;
2752   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2753   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2754   */
2755   if (scall == MAT_INITIAL_MATRIX){
2756     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2757     if (n == PETSC_DECIDE){
2758       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2759       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2760       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2761       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2762       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2763     }
2764 
2765     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2766     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2767     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2768     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2769     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2770 
2771     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2772     for (i=0;i<m;i++) {
2773       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2774       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2775       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2776     }
2777     /* This routine will ONLY return MPIAIJ type matrix */
2778     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2779     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2780     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2781     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2782 
2783   } else if (scall == MAT_REUSE_MATRIX){
2784     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2785   } else {
2786     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2787   }
2788 
2789   for (i=0;i<m;i++) {
2790     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2791     I    = i + rstart;
2792     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2793     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2794   }
2795   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2796   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798 
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatFileSplit"
2804 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2805 {
2806   PetscErrorCode    ierr;
2807   PetscMPIInt       rank;
2808   PetscInt          m,N,i,rstart,nnz;
2809   size_t            len;
2810   const PetscInt    *indx;
2811   PetscViewer       out;
2812   char              *name;
2813   Mat               B;
2814   const PetscScalar *values;
2815 
2816   PetscFunctionBegin;
2817   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2818   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2819   /* Should this be the type of the diagonal block of A? */
2820   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2821   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2822   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2823   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2824   for (i=0;i<m;i++) {
2825     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2826     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2827     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2828   }
2829   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2830   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2831 
2832   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2833   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2834   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2835   sprintf(name,"%s.%d",outfile,rank);
2836   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2837   ierr = PetscFree(name);
2838   ierr = MatView(B,out);CHKERRQ(ierr);
2839   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2840   ierr = MatDestroy(B);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2845 #undef __FUNCT__
2846 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2847 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2848 {
2849   PetscErrorCode       ierr;
2850   Mat_Merge_SeqsToMPI  *merge;
2851   PetscObjectContainer container;
2852 
2853   PetscFunctionBegin;
2854   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2855   if (container) {
2856     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2857     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2858     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2859     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2860     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2862     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2863     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2864     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2865     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2866     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2867     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2868 
2869     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2870     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2871   }
2872   ierr = PetscFree(merge);CHKERRQ(ierr);
2873 
2874   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2875   PetscFunctionReturn(0);
2876 }
2877 
2878 #include "src/mat/utils/freespace.h"
2879 #include "petscbt.h"
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatMerge_SeqsToMPI"
2882 /*@C
2883       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2884                  matrices from each processor
2885 
2886     Collective on MPI_Comm
2887 
2888    Input Parameters:
2889 +    comm - the communicators the parallel matrix will live on
2890 .    seqmat - the input sequential matrices
2891 .    m - number of local rows (or PETSC_DECIDE)
2892 .    n - number of local columns (or PETSC_DECIDE)
2893 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2894 
2895    Output Parameter:
2896 .    mpimat - the parallel matrix generated
2897 
2898     Level: advanced
2899 
2900    Notes:
2901      The dimensions of the sequential matrix in each processor MUST be the same.
2902      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2903      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2904 @*/
2905 static PetscEvent logkey_seqstompinum = 0;
2906 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2907 {
2908   PetscErrorCode       ierr;
2909   MPI_Comm             comm=mpimat->comm;
2910   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2911   PetscMPIInt          size,rank,taga,*len_s;
2912   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2913   PetscInt             proc,m;
2914   PetscInt             **buf_ri,**buf_rj;
2915   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2916   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2917   MPI_Request          *s_waits,*r_waits;
2918   MPI_Status           *status;
2919   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2920   Mat_Merge_SeqsToMPI  *merge;
2921   PetscObjectContainer container;
2922 
2923   PetscFunctionBegin;
2924   if (!logkey_seqstompinum) {
2925     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2926   }
2927   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2928 
2929   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2930   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2931 
2932   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2933   if (container) {
2934     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2935   }
2936   bi     = merge->bi;
2937   bj     = merge->bj;
2938   buf_ri = merge->buf_ri;
2939   buf_rj = merge->buf_rj;
2940 
2941   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2942   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2943   len_s  = merge->len_s;
2944 
2945   /* send and recv matrix values */
2946   /*-----------------------------*/
2947   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2948   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2949 
2950   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2951   for (proc=0,k=0; proc<size; proc++){
2952     if (!len_s[proc]) continue;
2953     i = owners[proc];
2954     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2955     k++;
2956   }
2957 
2958   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2959   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2960   ierr = PetscFree(status);CHKERRQ(ierr);
2961 
2962   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2963   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2964 
2965   /* insert mat values of mpimat */
2966   /*----------------------------*/
2967   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2968   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2969   nextrow = buf_ri_k + merge->nrecv;
2970   nextai  = nextrow + merge->nrecv;
2971 
2972   for (k=0; k<merge->nrecv; k++){
2973     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2974     nrows = *(buf_ri_k[k]);
2975     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2976     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2977   }
2978 
2979   /* set values of ba */
2980   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2981   for (i=0; i<m; i++) {
2982     arow = owners[rank] + i;
2983     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2984     bnzi = bi[i+1] - bi[i];
2985     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2986 
2987     /* add local non-zero vals of this proc's seqmat into ba */
2988     anzi = ai[arow+1] - ai[arow];
2989     aj   = a->j + ai[arow];
2990     aa   = a->a + ai[arow];
2991     nextaj = 0;
2992     for (j=0; nextaj<anzi; j++){
2993       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2994         ba_i[j] += aa[nextaj++];
2995       }
2996     }
2997 
2998     /* add received vals into ba */
2999     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3000       /* i-th row */
3001       if (i == *nextrow[k]) {
3002         anzi = *(nextai[k]+1) - *nextai[k];
3003         aj   = buf_rj[k] + *(nextai[k]);
3004         aa   = abuf_r[k] + *(nextai[k]);
3005         nextaj = 0;
3006         for (j=0; nextaj<anzi; j++){
3007           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3008             ba_i[j] += aa[nextaj++];
3009           }
3010         }
3011         nextrow[k]++; nextai[k]++;
3012       }
3013     }
3014     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3015   }
3016   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3017   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3018 
3019   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3020   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3021   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3022   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 static PetscEvent logkey_seqstompisym = 0;
3026 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3027 {
3028   PetscErrorCode       ierr;
3029   Mat                  B_mpi;
3030   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3031   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3032   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3033   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3034   PetscInt             len,proc,*dnz,*onz;
3035   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3036   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3037   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3038   MPI_Status           *status;
3039   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3040   PetscBT              lnkbt;
3041   Mat_Merge_SeqsToMPI  *merge;
3042   PetscObjectContainer container;
3043 
3044   PetscFunctionBegin;
3045   if (!logkey_seqstompisym) {
3046     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3047   }
3048   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3049 
3050   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3051   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3052 
3053   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3054   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3055 
3056   /* determine row ownership */
3057   /*---------------------------------------------------------*/
3058   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3059   if (m == PETSC_DECIDE) {
3060     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3061   } else {
3062     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3063   }
3064   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3065   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3066   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3067 
3068   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3069   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3070 
3071   /* determine the number of messages to send, their lengths */
3072   /*---------------------------------------------------------*/
3073   len_s  = merge->len_s;
3074 
3075   len = 0;  /* length of buf_si[] */
3076   merge->nsend = 0;
3077   for (proc=0; proc<size; proc++){
3078     len_si[proc] = 0;
3079     if (proc == rank){
3080       len_s[proc] = 0;
3081     } else {
3082       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3083       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3084     }
3085     if (len_s[proc]) {
3086       merge->nsend++;
3087       nrows = 0;
3088       for (i=owners[proc]; i<owners[proc+1]; i++){
3089         if (ai[i+1] > ai[i]) nrows++;
3090       }
3091       len_si[proc] = 2*(nrows+1);
3092       len += len_si[proc];
3093     }
3094   }
3095 
3096   /* determine the number and length of messages to receive for ij-structure */
3097   /*-------------------------------------------------------------------------*/
3098   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3099   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3100 
3101   /* post the Irecv of j-structure */
3102   /*-------------------------------*/
3103   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3104   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3105 
3106   /* post the Isend of j-structure */
3107   /*--------------------------------*/
3108   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3109   sj_waits = si_waits + merge->nsend;
3110 
3111   for (proc=0, k=0; proc<size; proc++){
3112     if (!len_s[proc]) continue;
3113     i = owners[proc];
3114     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3115     k++;
3116   }
3117 
3118   /* receives and sends of j-structure are complete */
3119   /*------------------------------------------------*/
3120   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3121   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3122 
3123   /* send and recv i-structure */
3124   /*---------------------------*/
3125   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3126   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3127 
3128   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3129   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3130   for (proc=0,k=0; proc<size; proc++){
3131     if (!len_s[proc]) continue;
3132     /* form outgoing message for i-structure:
3133          buf_si[0]:                 nrows to be sent
3134                [1:nrows]:           row index (global)
3135                [nrows+1:2*nrows+1]: i-structure index
3136     */
3137     /*-------------------------------------------*/
3138     nrows = len_si[proc]/2 - 1;
3139     buf_si_i    = buf_si + nrows+1;
3140     buf_si[0]   = nrows;
3141     buf_si_i[0] = 0;
3142     nrows = 0;
3143     for (i=owners[proc]; i<owners[proc+1]; i++){
3144       anzi = ai[i+1] - ai[i];
3145       if (anzi) {
3146         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3147         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3148         nrows++;
3149       }
3150     }
3151     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3152     k++;
3153     buf_si += len_si[proc];
3154   }
3155 
3156   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3157   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3158 
3159   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3160   for (i=0; i<merge->nrecv; i++){
3161     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3162   }
3163 
3164   ierr = PetscFree(len_si);CHKERRQ(ierr);
3165   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3166   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3167   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3168   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3169   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3170   ierr = PetscFree(status);CHKERRQ(ierr);
3171 
3172   /* compute a local seq matrix in each processor */
3173   /*----------------------------------------------*/
3174   /* allocate bi array and free space for accumulating nonzero column info */
3175   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3176   bi[0] = 0;
3177 
3178   /* create and initialize a linked list */
3179   nlnk = N+1;
3180   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3181 
3182   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3183   len = 0;
3184   len  = ai[owners[rank+1]] - ai[owners[rank]];
3185   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3186   current_space = free_space;
3187 
3188   /* determine symbolic info for each local row */
3189   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3190   nextrow = buf_ri_k + merge->nrecv;
3191   nextai  = nextrow + merge->nrecv;
3192   for (k=0; k<merge->nrecv; k++){
3193     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3194     nrows = *buf_ri_k[k];
3195     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3196     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3197   }
3198 
3199   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3200   len = 0;
3201   for (i=0;i<m;i++) {
3202     bnzi   = 0;
3203     /* add local non-zero cols of this proc's seqmat into lnk */
3204     arow   = owners[rank] + i;
3205     anzi   = ai[arow+1] - ai[arow];
3206     aj     = a->j + ai[arow];
3207     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3208     bnzi += nlnk;
3209     /* add received col data into lnk */
3210     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3211       if (i == *nextrow[k]) { /* i-th row */
3212         anzi = *(nextai[k]+1) - *nextai[k];
3213         aj   = buf_rj[k] + *nextai[k];
3214         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3215         bnzi += nlnk;
3216         nextrow[k]++; nextai[k]++;
3217       }
3218     }
3219     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3220 
3221     /* if free space is not available, make more free space */
3222     if (current_space->local_remaining<bnzi) {
3223       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3224       nspacedouble++;
3225     }
3226     /* copy data into free space, then initialize lnk */
3227     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3228     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3229 
3230     current_space->array           += bnzi;
3231     current_space->local_used      += bnzi;
3232     current_space->local_remaining -= bnzi;
3233 
3234     bi[i+1] = bi[i] + bnzi;
3235   }
3236 
3237   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3238 
3239   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3240   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3241   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3242 
3243   /* create symbolic parallel matrix B_mpi */
3244   /*---------------------------------------*/
3245   if (n==PETSC_DECIDE) {
3246     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3247   } else {
3248     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3249   }
3250   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3251   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3252   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3253 
3254   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3255   B_mpi->assembled     = PETSC_FALSE;
3256   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3257   merge->bi            = bi;
3258   merge->bj            = bj;
3259   merge->buf_ri        = buf_ri;
3260   merge->buf_rj        = buf_rj;
3261   merge->coi           = PETSC_NULL;
3262   merge->coj           = PETSC_NULL;
3263   merge->owners_co     = PETSC_NULL;
3264 
3265   /* attach the supporting struct to B_mpi for reuse */
3266   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3267   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3268   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3269   *mpimat = B_mpi;
3270   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3271   PetscFunctionReturn(0);
3272 }
3273 
3274 static PetscEvent logkey_seqstompi = 0;
3275 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3276 {
3277   PetscErrorCode   ierr;
3278 
3279   PetscFunctionBegin;
3280   if (!logkey_seqstompi) {
3281     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3282   }
3283   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3284   if (scall == MAT_INITIAL_MATRIX){
3285     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3286   }
3287   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3288   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 static PetscEvent logkey_getlocalmat = 0;
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatGetLocalMat"
3294 /*@C
3295      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3296 
3297     Not Collective
3298 
3299    Input Parameters:
3300 +    A - the matrix
3301 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3302 
3303    Output Parameter:
3304 .    A_loc - the local sequential matrix generated
3305 
3306     Level: developer
3307 
3308 @*/
3309 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3310 {
3311   PetscErrorCode  ierr;
3312   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3313   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3314   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3315   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3316   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3317   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3318 
3319   PetscFunctionBegin;
3320   if (!logkey_getlocalmat) {
3321     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3322   }
3323   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3324   if (scall == MAT_INITIAL_MATRIX){
3325     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3326     ci[0] = 0;
3327     for (i=0; i<am; i++){
3328       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3329     }
3330     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3331     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3332     k = 0;
3333     for (i=0; i<am; i++) {
3334       ncols_o = bi[i+1] - bi[i];
3335       ncols_d = ai[i+1] - ai[i];
3336       /* off-diagonal portion of A */
3337       for (jo=0; jo<ncols_o; jo++) {
3338         col = cmap[*bj];
3339         if (col >= cstart) break;
3340         cj[k]   = col; bj++;
3341         ca[k++] = *ba++;
3342       }
3343       /* diagonal portion of A */
3344       for (j=0; j<ncols_d; j++) {
3345         cj[k]   = cstart + *aj++;
3346         ca[k++] = *aa++;
3347       }
3348       /* off-diagonal portion of A */
3349       for (j=jo; j<ncols_o; j++) {
3350         cj[k]   = cmap[*bj++];
3351         ca[k++] = *ba++;
3352       }
3353     }
3354     /* put together the new matrix */
3355     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3356     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3357     /* Since these are PETSc arrays, change flags to free them as necessary. */
3358     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3359     mat->freedata = PETSC_TRUE;
3360     mat->nonew    = 0;
3361   } else if (scall == MAT_REUSE_MATRIX){
3362     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3363     ci = mat->i; cj = mat->j; ca = mat->a;
3364     for (i=0; i<am; i++) {
3365       /* off-diagonal portion of A */
3366       ncols_o = bi[i+1] - bi[i];
3367       for (jo=0; jo<ncols_o; jo++) {
3368         col = cmap[*bj];
3369         if (col >= cstart) break;
3370         *ca++ = *ba++; bj++;
3371       }
3372       /* diagonal portion of A */
3373       ncols_d = ai[i+1] - ai[i];
3374       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3375       /* off-diagonal portion of A */
3376       for (j=jo; j<ncols_o; j++) {
3377         *ca++ = *ba++; bj++;
3378       }
3379     }
3380   } else {
3381     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3382   }
3383 
3384   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 
3388 static PetscEvent logkey_getlocalmatcondensed = 0;
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatGetLocalMatCondensed"
3391 /*@C
3392      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3393 
3394     Not Collective
3395 
3396    Input Parameters:
3397 +    A - the matrix
3398 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3399 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3400 
3401    Output Parameter:
3402 .    A_loc - the local sequential matrix generated
3403 
3404     Level: developer
3405 
3406 @*/
3407 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3408 {
3409   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3410   PetscErrorCode    ierr;
3411   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3412   IS                isrowa,iscola;
3413   Mat               *aloc;
3414 
3415   PetscFunctionBegin;
3416   if (!logkey_getlocalmatcondensed) {
3417     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3418   }
3419   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3420   if (!row){
3421     start = a->rstart; end = a->rend;
3422     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3423   } else {
3424     isrowa = *row;
3425   }
3426   if (!col){
3427     start = a->cstart;
3428     cmap  = a->garray;
3429     nzA   = a->A->n;
3430     nzB   = a->B->n;
3431     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3432     ncols = 0;
3433     for (i=0; i<nzB; i++) {
3434       if (cmap[i] < start) idx[ncols++] = cmap[i];
3435       else break;
3436     }
3437     imark = i;
3438     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3439     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3440     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3441     ierr = PetscFree(idx);CHKERRQ(ierr);
3442   } else {
3443     iscola = *col;
3444   }
3445   if (scall != MAT_INITIAL_MATRIX){
3446     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3447     aloc[0] = *A_loc;
3448   }
3449   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3450   *A_loc = aloc[0];
3451   ierr = PetscFree(aloc);CHKERRQ(ierr);
3452   if (!row){
3453     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3454   }
3455   if (!col){
3456     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3457   }
3458   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3459   PetscFunctionReturn(0);
3460 }
3461 
3462 static PetscEvent logkey_GetBrowsOfAcols = 0;
3463 #undef __FUNCT__
3464 #define __FUNCT__ "MatGetBrowsOfAcols"
3465 /*@C
3466     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3467 
3468     Collective on Mat
3469 
3470    Input Parameters:
3471 +    A,B - the matrices in mpiaij format
3472 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3473 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3474 
3475    Output Parameter:
3476 +    rowb, colb - index sets of rows and columns of B to extract
3477 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3478 -    B_seq - the sequential matrix generated
3479 
3480     Level: developer
3481 
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3484 {
3485   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3486   PetscErrorCode    ierr;
3487   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3488   IS                isrowb,iscolb;
3489   Mat               *bseq;
3490 
3491   PetscFunctionBegin;
3492   if (a->cstart != b->rstart || a->cend != b->rend){
3493     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3494   }
3495   if (!logkey_GetBrowsOfAcols) {
3496     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3497   }
3498   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3499 
3500   if (scall == MAT_INITIAL_MATRIX){
3501     start = a->cstart;
3502     cmap  = a->garray;
3503     nzA   = a->A->n;
3504     nzB   = a->B->n;
3505     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3506     ncols = 0;
3507     for (i=0; i<nzB; i++) {  /* row < local row index */
3508       if (cmap[i] < start) idx[ncols++] = cmap[i];
3509       else break;
3510     }
3511     imark = i;
3512     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3513     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3514     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3515     ierr = PetscFree(idx);CHKERRQ(ierr);
3516     *brstart = imark;
3517     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3518   } else {
3519     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3520     isrowb = *rowb; iscolb = *colb;
3521     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3522     bseq[0] = *B_seq;
3523   }
3524   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3525   *B_seq = bseq[0];
3526   ierr = PetscFree(bseq);CHKERRQ(ierr);
3527   if (!rowb){
3528     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3529   } else {
3530     *rowb = isrowb;
3531   }
3532   if (!colb){
3533     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3534   } else {
3535     *colb = iscolb;
3536   }
3537   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 static PetscEvent logkey_GetBrowsOfAocols = 0;
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetBrowsOfAoCols"
3544 /*@C
3545     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3546     of the OFF-DIAGONAL portion of local A
3547 
3548     Collective on Mat
3549 
3550    Input Parameters:
3551 +    A,B - the matrices in mpiaij format
3552 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3553 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3554 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3555 
3556    Output Parameter:
3557 +    B_oth - the sequential matrix generated
3558 
3559     Level: developer
3560 
3561 @*/
3562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3563 {
3564   VecScatter_MPI_General *gen_to,*gen_from;
3565   PetscErrorCode         ierr;
3566   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3567   Mat_SeqAIJ             *b_oth;
3568   VecScatter             ctx=a->Mvctx;
3569   MPI_Comm               comm=ctx->comm;
3570   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3571   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3572   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3573   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3574   MPI_Request            *rwaits,*swaits;
3575   MPI_Status             *sstatus,rstatus;
3576   PetscInt               *cols;
3577   PetscScalar            *vals;
3578   PetscMPIInt            j;
3579 
3580   PetscFunctionBegin;
3581   if (a->cstart != b->rstart || a->cend != b->rend){
3582     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3583   }
3584   if (!logkey_GetBrowsOfAocols) {
3585     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3586   }
3587   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3588   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3589 
3590   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3591   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3592   rvalues  = gen_from->values; /* holds the length of sending row */
3593   svalues  = gen_to->values;   /* holds the length of receiving row */
3594   nrecvs   = gen_from->n;
3595   nsends   = gen_to->n;
3596   rwaits   = gen_from->requests;
3597   swaits   = gen_to->requests;
3598   srow     = gen_to->indices;   /* local row index to be sent */
3599   rstarts  = gen_from->starts;
3600   sstarts  = gen_to->starts;
3601   rprocs   = gen_from->procs;
3602   sprocs   = gen_to->procs;
3603   sstatus  = gen_to->sstatus;
3604 
3605   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3606   if (scall == MAT_INITIAL_MATRIX){
3607     /* i-array */
3608     /*---------*/
3609     /*  post receives */
3610     for (i=0; i<nrecvs; i++){
3611       rowlen = (PetscInt*)rvalues + rstarts[i];
3612       nrows = rstarts[i+1]-rstarts[i];
3613       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3614     }
3615 
3616     /* pack the outgoing message */
3617     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3618     rstartsj = sstartsj + nsends +1;
3619     sstartsj[0] = 0;  rstartsj[0] = 0;
3620     len = 0; /* total length of j or a array to be sent */
3621     k = 0;
3622     for (i=0; i<nsends; i++){
3623       rowlen = (PetscInt*)svalues + sstarts[i];
3624       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3625       for (j=0; j<nrows; j++) {
3626         row = srow[k] + b->rowners[rank]; /* global row idx */
3627         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3628         len += rowlen[j];
3629         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3630         k++;
3631       }
3632       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3633        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3634     }
3635     /* recvs and sends of i-array are completed */
3636     i = nrecvs;
3637     while (i--) {
3638       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3639     }
3640     if (nsends) {
3641       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3642     }
3643     /* allocate buffers for sending j and a arrays */
3644     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3645     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3646 
3647     /* create i-array of B_oth */
3648     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3649     b_othi[0] = 0;
3650     len = 0; /* total length of j or a array to be received */
3651     k = 0;
3652     for (i=0; i<nrecvs; i++){
3653       rowlen = (PetscInt*)rvalues + rstarts[i];
3654       nrows = rstarts[i+1]-rstarts[i];
3655       for (j=0; j<nrows; j++) {
3656         b_othi[k+1] = b_othi[k] + rowlen[j];
3657         len += rowlen[j]; k++;
3658       }
3659       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3660     }
3661 
3662     /* allocate space for j and a arrrays of B_oth */
3663     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3664     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3665 
3666     /* j-array */
3667     /*---------*/
3668     /*  post receives of j-array */
3669     for (i=0; i<nrecvs; i++){
3670       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3671       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3672     }
3673     k = 0;
3674     for (i=0; i<nsends; i++){
3675       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3676       bufJ = bufj+sstartsj[i];
3677       for (j=0; j<nrows; j++) {
3678         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3679         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3680         for (l=0; l<ncols; l++){
3681           *bufJ++ = cols[l];
3682         }
3683         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3684       }
3685       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3686     }
3687 
3688     /* recvs and sends of j-array are completed */
3689     i = nrecvs;
3690     while (i--) {
3691       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3692     }
3693     if (nsends) {
3694       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3695     }
3696   } else if (scall == MAT_REUSE_MATRIX){
3697     sstartsj = *startsj;
3698     rstartsj = sstartsj + nsends +1;
3699     bufa     = *bufa_ptr;
3700     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3701     b_otha   = b_oth->a;
3702   } else {
3703     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3704   }
3705 
3706   /* a-array */
3707   /*---------*/
3708   /*  post receives of a-array */
3709   for (i=0; i<nrecvs; i++){
3710     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3711     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3712   }
3713   k = 0;
3714   for (i=0; i<nsends; i++){
3715     nrows = sstarts[i+1]-sstarts[i];
3716     bufA = bufa+sstartsj[i];
3717     for (j=0; j<nrows; j++) {
3718       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3719       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3720       for (l=0; l<ncols; l++){
3721         *bufA++ = vals[l];
3722       }
3723       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3724 
3725     }
3726     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3727   }
3728   /* recvs and sends of a-array are completed */
3729   i = nrecvs;
3730   while (i--) {
3731     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3732   }
3733    if (nsends) {
3734     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3735   }
3736 
3737   if (scall == MAT_INITIAL_MATRIX){
3738     /* put together the new matrix */
3739     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3740 
3741     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3742     /* Since these are PETSc arrays, change flags to free them as necessary. */
3743     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3744     b_oth->freedata = PETSC_TRUE;
3745     b_oth->nonew    = 0;
3746 
3747     ierr = PetscFree(bufj);CHKERRQ(ierr);
3748     if (!startsj || !bufa_ptr){
3749       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3750       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3751     } else {
3752       *startsj  = sstartsj;
3753       *bufa_ptr = bufa;
3754     }
3755   }
3756   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3757 
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 /*MC
3762    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3763 
3764    Options Database Keys:
3765 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3766 
3767   Level: beginner
3768 
3769 .seealso: MatCreateMPIAIJ
3770 M*/
3771 
3772 EXTERN_C_BEGIN
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatCreate_MPIAIJ"
3775 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3776 {
3777   Mat_MPIAIJ     *b;
3778   PetscErrorCode ierr;
3779   PetscInt       i;
3780   PetscMPIInt    size;
3781 
3782   PetscFunctionBegin;
3783   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3784 
3785   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3786   B->data         = (void*)b;
3787   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3788   B->factor       = 0;
3789   B->bs           = 1;
3790   B->assembled    = PETSC_FALSE;
3791   B->mapping      = 0;
3792 
3793   B->insertmode      = NOT_SET_VALUES;
3794   b->size            = size;
3795   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3796 
3797   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3798   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3799 
3800   /* the information in the maps duplicates the information computed below, eventually
3801      we should remove the duplicate information that is not contained in the maps */
3802   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3803   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3804 
3805   /* build local table of row and column ownerships */
3806   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3807   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3808   b->cowners = b->rowners + b->size + 2;
3809   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3810   b->rowners[0] = 0;
3811   for (i=2; i<=b->size; i++) {
3812     b->rowners[i] += b->rowners[i-1];
3813   }
3814   b->rstart = b->rowners[b->rank];
3815   b->rend   = b->rowners[b->rank+1];
3816   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3817   b->cowners[0] = 0;
3818   for (i=2; i<=b->size; i++) {
3819     b->cowners[i] += b->cowners[i-1];
3820   }
3821   b->cstart = b->cowners[b->rank];
3822   b->cend   = b->cowners[b->rank+1];
3823 
3824   /* build cache for off array entries formed */
3825   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3826   b->donotstash  = PETSC_FALSE;
3827   b->colmap      = 0;
3828   b->garray      = 0;
3829   b->roworiented = PETSC_TRUE;
3830 
3831   /* stuff used for matrix vector multiply */
3832   b->lvec      = PETSC_NULL;
3833   b->Mvctx     = PETSC_NULL;
3834 
3835   /* stuff for MatGetRow() */
3836   b->rowindices   = 0;
3837   b->rowvalues    = 0;
3838   b->getrowactive = PETSC_FALSE;
3839 
3840   /* Explicitly create 2 MATSEQAIJ matrices. */
3841   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3842   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3843   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3844   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3845   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3846   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3847 
3848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3849                                      "MatStoreValues_MPIAIJ",
3850                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3852                                      "MatRetrieveValues_MPIAIJ",
3853                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3855 				     "MatGetDiagonalBlock_MPIAIJ",
3856                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3858 				     "MatIsTranspose_MPIAIJ",
3859 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3861 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3862 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3864 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3865 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3867 				     "MatDiagonalScaleLocal_MPIAIJ",
3868 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3869   PetscFunctionReturn(0);
3870 }
3871 EXTERN_C_END
3872