xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 39d35d2dd2fd359ed0df7a061277ef1709a6de28)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
39     lastcol1 = col;\
40     while (high1-low1 > 5) { \
41       t = (low1+high1)/2; \
42       if (rp1[t] > col) high1 = t; \
43       else             low1  = t; \
44     } \
45       for (_i=low1; _i<high1; _i++) { \
46         if (rp1[_i] > col) break; \
47         if (rp1[_i] == col) { \
48           if (addv == ADD_VALUES) ap1[_i] += value;   \
49           else                    ap1[_i] = value; \
50           goto a_noinsert; \
51         } \
52       }  \
53       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
54       if (nonew == 1) goto a_noinsert; \
55       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
56       if (nrow1 >= rmax1) { \
57         /* there is no extra room in row, therefore enlarge */ \
58         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
59         PetscScalar *new_a; \
60  \
61         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
62  \
63         /* malloc new storage space */ \
64         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
65         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
66         new_j   = (PetscInt*)(new_a + new_nz); \
67         new_i   = new_j + new_nz; \
68  \
69         /* copy over old data into new slots */ \
70         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
71         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
72         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \
73         len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \
74         ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \
75         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \
76         ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
77         /* free up old matrix storage */ \
78  \
79         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
80         if (!a->singlemalloc) { \
81            ierr = PetscFree(a->i);CHKERRQ(ierr); \
82            ierr = PetscFree(a->j);CHKERRQ(ierr); \
83         } \
84         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
85         a->singlemalloc = PETSC_TRUE; \
86  \
87         rp1   = aj + ai[row]; ap1 = aa + ai[row]; \
88         rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \
89         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
90         a->maxnz += CHUNKSIZE; \
91         a->reallocs++; \
92       } \
93       N = nrow1++ - 1; a->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp1[ii+1] = rp1[ii]; \
97         ap1[ii+1] = ap1[ii]; \
98       } \
99       rp1[_i] = col;  \
100       ap1[_i] = value;  \
101       a_noinsert: ; \
102       ailen[row] = nrow1; \
103 }
104 
105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
106 { \
107     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
108     lastcol2 = col;\
109     while (high2-low2 > 5) { \
110       t = (low2+high2)/2; \
111       if (rp2[t] > col) high2 = t; \
112       else             low2  = t; \
113     } \
114        for (_i=low2; _i<high2; _i++) { \
115         if (rp2[_i] > col) break; \
116         if (rp2[_i] == col) { \
117           if (addv == ADD_VALUES) ap2[_i] += value;   \
118           else                    ap2[_i] = value; \
119           goto b_noinsert; \
120         } \
121       }  \
122       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
123       if (nonew == 1) goto b_noinsert; \
124       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
125       if (nrow2 >= rmax2) { \
126         /* there is no extra room in row, therefore enlarge */ \
127         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
128         PetscScalar *new_a; \
129  \
130         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
131  \
132         /* malloc new storage space */ \
133         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
134         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
135         new_j   = (PetscInt*)(new_a + new_nz); \
136         new_i   = new_j + new_nz; \
137  \
138         /* copy over old data into new slots */ \
139         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
140         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
141         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \
142         len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \
143         ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \
144         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \
145         ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
146         /* free up old matrix storage */ \
147  \
148         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
149         if (!b->singlemalloc) { \
150           ierr = PetscFree(b->i);CHKERRQ(ierr); \
151           ierr = PetscFree(b->j);CHKERRQ(ierr); \
152         } \
153         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
154         b->singlemalloc = PETSC_TRUE; \
155  \
156         rp2   = bj + bi[row]; ap2 = ba + bi[row]; \
157         rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \
158         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
159         b->maxnz += CHUNKSIZE; \
160         b->reallocs++; \
161       } \
162       N = nrow2++ - 1; b->nz++; \
163       /* shift up all the later entries in this row */ \
164       for (ii=N; ii>=_i; ii--) { \
165         rp2[ii+1] = rp2[ii]; \
166         ap2[ii+1] = ap2[ii]; \
167       } \
168       rp2[_i] = col;  \
169       ap2[_i] = value;  \
170       b_noinsert: ; \
171       bilen[row] = nrow2; \
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatSetValues_MPIAIJ"
176 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
177 {
178   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
179   PetscScalar    value;
180   PetscErrorCode ierr;
181   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
182   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
183   PetscTruth     roworiented = aij->roworiented;
184 
185   /* Some Variables required in the macro */
186   Mat            A = aij->A;
187   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
188   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
189   PetscScalar    *aa = a->a;
190   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
191   Mat            B = aij->B;
192   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
193   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
194   PetscScalar    *ba = b->a;
195 
196   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
197   PetscInt       nonew = a->nonew;
198   PetscScalar    *ap1,*ap2;
199 
200   PetscFunctionBegin;
201   for (i=0; i<m; i++) {
202     if (im[i] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
205 #endif
206     if (im[i] >= rstart && im[i] < rend) {
207       row      = im[i] - rstart;
208       lastcol1 = -1;
209       rp1      = aj + ai[row];
210       ap1      = aa + ai[row];
211       rmax1    = aimax[row];
212       nrow1    = ailen[row];
213       low1     = 0;
214       high1    = nrow1;
215       lastcol2 = -1;
216       rp2      = bj + bi[row];
217       ap2      = ba + bi[row]; \
218       rmax2    = bimax[row];
219       nrow2    = bilen[row];  \
220       low2     = 0;
221       high2    = nrow2;
222 
223       for (j=0; j<n; j++) {
224         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
225         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255         }
256       }
257     } else {
258       if (!aij->donotstash) {
259         if (roworiented) {
260           if (ignorezeroentries && v[i*n] == 0.0) continue;
261           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
262         } else {
263           if (ignorezeroentries && v[i] == 0.0) continue;
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatGetValues_MPIAIJ"
274 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
275 {
276   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
277   PetscErrorCode ierr;
278   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
279   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
280 
281   PetscFunctionBegin;
282   for (i=0; i<m; i++) {
283     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
284     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
285     if (idxm[i] >= rstart && idxm[i] < rend) {
286       row = idxm[i] - rstart;
287       for (j=0; j<n; j++) {
288         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
289         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
290         if (idxn[j] >= cstart && idxn[j] < cend){
291           col = idxn[j] - cstart;
292           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
293         } else {
294           if (!aij->colmap) {
295             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
296           }
297 #if defined (PETSC_USE_CTABLE)
298           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
299           col --;
300 #else
301           col = aij->colmap[idxn[j]] - 1;
302 #endif
303           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
304           else {
305             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
306           }
307         }
308       }
309     } else {
310       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
311     }
312   }
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
318 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
319 {
320   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
321   PetscErrorCode ierr;
322   PetscInt       nstash,reallocs;
323   InsertMode     addv;
324 
325   PetscFunctionBegin;
326   if (aij->donotstash) {
327     PetscFunctionReturn(0);
328   }
329 
330   /* make sure all processors are either in INSERTMODE or ADDMODE */
331   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
332   if (addv == (ADD_VALUES|INSERT_VALUES)) {
333     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
334   }
335   mat->insertmode = addv; /* in case this processor had no cache */
336 
337   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
338   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
339   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   PetscMPIInt    n;
351   PetscInt       i,j,rstart,ncols,flg;
352   PetscInt       *row,*col,other_disassembled;
353   PetscScalar    *val;
354   InsertMode     addv = mat->insertmode;
355 
356   PetscFunctionBegin;
357   if (!aij->donotstash) {
358     while (1) {
359       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
360       if (!flg) break;
361 
362       for (i=0; i<n;) {
363         /* Now identify the consecutive vals belonging to the same row */
364         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
365         if (j < n) ncols = j-i;
366         else       ncols = n-i;
367         /* Now assemble all these values with a single function call */
368         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
369         i = j;
370       }
371     }
372     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
373   }
374   a->compressedrow.use     = PETSC_FALSE;
375   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
376   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
377 
378   /* determine if any processor has disassembled, if so we must
379      also disassemble ourselfs, in order that we may reassemble. */
380   /*
381      if nonzero structure of submatrix B cannot change then we know that
382      no processor disassembled thus we can skip this stuff
383   */
384   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
385     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
386     if (mat->was_assembled && !other_disassembled) {
387       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
388       /* reaccess the b because aij->B was changed */
389       b    = (Mat_SeqAIJ *)aij->B->data;
390     }
391   }
392 
393   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
394     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
395   }
396   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
397   b->compressedrow.use = PETSC_TRUE;
398   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
399   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
400 
401   if (aij->rowvalues) {
402     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
403     aij->rowvalues = 0;
404   }
405 
406   /* used by MatAXPY() */
407   a->xtoy = 0; b->xtoy = 0;
408   a->XtoY = 0; b->XtoY = 0;
409 
410   PetscFunctionReturn(0);
411 }
412 
413 #undef __FUNCT__
414 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
415 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
416 {
417   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
418   PetscErrorCode ierr;
419 
420   PetscFunctionBegin;
421   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
422   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
423   PetscFunctionReturn(0);
424 }
425 
426 #undef __FUNCT__
427 #define __FUNCT__ "MatZeroRows_MPIAIJ"
428 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
429 {
430   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
431   PetscErrorCode ierr;
432   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
433   PetscInt       i,N,*rows,*owners = l->rowners;
434   PetscInt       *nprocs,j,idx,nsends,row;
435   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
436   PetscInt       *rvalues,count,base,slen,*source;
437   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
438   MPI_Comm       comm = A->comm;
439   MPI_Request    *send_waits,*recv_waits;
440   MPI_Status     recv_status,*send_status;
441   IS             istmp;
442 #if defined(PETSC_DEBUG)
443   PetscTruth     found = PETSC_FALSE;
444 #endif
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   j = 0;
455   for (i=0; i<N; i++) {
456     if (lastidx > (idx = rows[i])) j = 0;
457     lastidx = idx;
458     for (; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++;
461         nprocs[2*j+1] = 1;
462         owner[i] = j;
463 #if defined(PETSC_DEBUG)
464         found = PETSC_TRUE;
465 #endif
466         break;
467       }
468     }
469 #if defined(PETSC_DEBUG)
470     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
471     found = PETSC_FALSE;
472 #endif
473   }
474   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
475 
476   /* inform other processors of number of messages and max length*/
477   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
478 
479   /* post receives:   */
480   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
482   for (i=0; i<nrecvs; i++) {
483     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
484   }
485 
486   /* do sends:
487       1) starts[i] gives the starting index in svalues for stuff going to
488          the ith processor
489   */
490   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
491   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
492   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
493   starts[0] = 0;
494   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
495   for (i=0; i<N; i++) {
496     svalues[starts[owner[i]]++] = rows[i];
497   }
498   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
499 
500   starts[0] = 0;
501   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
502   count = 0;
503   for (i=0; i<size; i++) {
504     if (nprocs[2*i+1]) {
505       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
506     }
507   }
508   ierr = PetscFree(starts);CHKERRQ(ierr);
509 
510   base = owners[rank];
511 
512   /*  wait on receives */
513   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
514   source = lens + nrecvs;
515   count  = nrecvs; slen = 0;
516   while (count) {
517     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
518     /* unpack receives into our local space */
519     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
520     source[imdex]  = recv_status.MPI_SOURCE;
521     lens[imdex]    = n;
522     slen          += n;
523     count--;
524   }
525   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
526 
527   /* move the data into the send scatter */
528   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
529   count = 0;
530   for (i=0; i<nrecvs; i++) {
531     values = rvalues + i*nmax;
532     for (j=0; j<lens[i]; j++) {
533       lrows[count++] = values[j] - base;
534     }
535   }
536   ierr = PetscFree(rvalues);CHKERRQ(ierr);
537   ierr = PetscFree(lens);CHKERRQ(ierr);
538   ierr = PetscFree(owner);CHKERRQ(ierr);
539   ierr = PetscFree(nprocs);CHKERRQ(ierr);
540 
541   /* actually zap the local rows */
542   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
543   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
544 
545   /*
546         Zero the required rows. If the "diagonal block" of the matrix
547      is square and the user wishes to set the diagonal we use seperate
548      code so that MatSetValues() is not called for each diagonal allocating
549      new memory, thus calling lots of mallocs and slowing things down.
550 
551        Contributed by: Mathew Knepley
552   */
553   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
554   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
555   if (diag && (l->A->M == l->A->N)) {
556     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
557   } else if (diag) {
558     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
559     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
560       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
561 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
562     }
563     for (i = 0; i < slen; i++) {
564       row  = lrows[i] + rstart;
565       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
566     }
567     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
568     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
569   } else {
570     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
571   }
572   ierr = ISDestroy(istmp);CHKERRQ(ierr);
573   ierr = PetscFree(lrows);CHKERRQ(ierr);
574 
575   /* wait on sends */
576   if (nsends) {
577     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
578     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
579     ierr = PetscFree(send_status);CHKERRQ(ierr);
580   }
581   ierr = PetscFree(send_waits);CHKERRQ(ierr);
582   ierr = PetscFree(svalues);CHKERRQ(ierr);
583 
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatMult_MPIAIJ"
589 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
590 {
591   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
592   PetscErrorCode ierr;
593   PetscInt       nt;
594 
595   PetscFunctionBegin;
596   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
597   if (nt != A->n) {
598     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
599   }
600   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
601   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
602   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
603   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatMultAdd_MPIAIJ"
609 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
610 {
611   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
612   PetscErrorCode ierr;
613 
614   PetscFunctionBegin;
615   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
616   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
617   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
618   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
624 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
625 {
626   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
627   PetscErrorCode ierr;
628   PetscTruth     merged;
629 
630   PetscFunctionBegin;
631   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
632   /* do nondiagonal part */
633   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
634   if (!merged) {
635     /* send it on its way */
636     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
637     /* do local part */
638     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
639     /* receive remote parts: note this assumes the values are not actually */
640     /* added in yy until the next line, */
641     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642   } else {
643     /* do local part */
644     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
645     /* send it on its way */
646     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
647     /* values actually were received in the Begin() but we need to call this nop */
648     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
656 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
657 {
658   MPI_Comm       comm;
659   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
660   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
661   IS             Me,Notme;
662   PetscErrorCode ierr;
663   PetscInt       M,N,first,last,*notme,i;
664   PetscMPIInt    size;
665 
666   PetscFunctionBegin;
667 
668   /* Easy test: symmetric diagonal block */
669   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
670   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
671   if (!*f) PetscFunctionReturn(0);
672   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
673   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
674   if (size == 1) PetscFunctionReturn(0);
675 
676   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
677   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
678   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
679   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
680   for (i=0; i<first; i++) notme[i] = i;
681   for (i=last; i<M; i++) notme[i-last+first] = i;
682   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
683   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
684   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
685   Aoff = Aoffs[0];
686   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
687   Boff = Boffs[0];
688   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
689   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
690   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
691   ierr = ISDestroy(Me);CHKERRQ(ierr);
692   ierr = ISDestroy(Notme);CHKERRQ(ierr);
693 
694   PetscFunctionReturn(0);
695 }
696 EXTERN_C_END
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
700 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
701 {
702   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
703   PetscErrorCode ierr;
704 
705   PetscFunctionBegin;
706   /* do nondiagonal part */
707   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
708   /* send it on its way */
709   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
710   /* do local part */
711   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
712   /* receive remote parts */
713   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 
717 /*
718   This only works correctly for square matrices where the subblock A->A is the
719    diagonal block
720 */
721 #undef __FUNCT__
722 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
723 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
724 {
725   PetscErrorCode ierr;
726   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
727 
728   PetscFunctionBegin;
729   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
730   if (a->rstart != a->cstart || a->rend != a->cend) {
731     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
732   }
733   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
734   PetscFunctionReturn(0);
735 }
736 
737 #undef __FUNCT__
738 #define __FUNCT__ "MatScale_MPIAIJ"
739 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
740 {
741   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
742   PetscErrorCode ierr;
743 
744   PetscFunctionBegin;
745   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
746   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatDestroy_MPIAIJ"
752 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
753 {
754   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758 #if defined(PETSC_USE_LOG)
759   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
760 #endif
761   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
762   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
763   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
764   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
765 #if defined (PETSC_USE_CTABLE)
766   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
767 #else
768   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
769 #endif
770   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
771   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
772   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
773   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
774   ierr = PetscFree(aij);CHKERRQ(ierr);
775 
776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatView_MPIAIJ_Binary"
788 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
789 {
790   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
791   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
792   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
793   PetscErrorCode    ierr;
794   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
795   int               fd;
796   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
797   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
798   PetscScalar       *column_values;
799 
800   PetscFunctionBegin;
801   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
802   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
803   nz   = A->nz + B->nz;
804   if (!rank) {
805     header[0] = MAT_FILE_COOKIE;
806     header[1] = mat->M;
807     header[2] = mat->N;
808     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
809     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
810     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
811     /* get largest number of rows any processor has */
812     rlen = mat->m;
813     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
814     for (i=1; i<size; i++) {
815       rlen = PetscMax(rlen,range[i+1] - range[i]);
816     }
817   } else {
818     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
819     rlen = mat->m;
820   }
821 
822   /* load up the local row counts */
823   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
824   for (i=0; i<mat->m; i++) {
825     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
826   }
827 
828   /* store the row lengths to the file */
829   if (!rank) {
830     MPI_Status status;
831     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
832     for (i=1; i<size; i++) {
833       rlen = range[i+1] - range[i];
834       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
835       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
836     }
837   } else {
838     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
839   }
840   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
841 
842   /* load up the local column indices */
843   nzmax = nz; /* )th processor needs space a largest processor needs */
844   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
845   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
846   cnt  = 0;
847   for (i=0; i<mat->m; i++) {
848     for (j=B->i[i]; j<B->i[i+1]; j++) {
849       if ( (col = garray[B->j[j]]) > cstart) break;
850       column_indices[cnt++] = col;
851     }
852     for (k=A->i[i]; k<A->i[i+1]; k++) {
853       column_indices[cnt++] = A->j[k] + cstart;
854     }
855     for (; j<B->i[i+1]; j++) {
856       column_indices[cnt++] = garray[B->j[j]];
857     }
858   }
859   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
860 
861   /* store the column indices to the file */
862   if (!rank) {
863     MPI_Status status;
864     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
865     for (i=1; i<size; i++) {
866       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
867       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
868       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
869       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
870     }
871   } else {
872     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
873     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
874   }
875   ierr = PetscFree(column_indices);CHKERRQ(ierr);
876 
877   /* load up the local column values */
878   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
879   cnt  = 0;
880   for (i=0; i<mat->m; i++) {
881     for (j=B->i[i]; j<B->i[i+1]; j++) {
882       if ( garray[B->j[j]] > cstart) break;
883       column_values[cnt++] = B->a[j];
884     }
885     for (k=A->i[i]; k<A->i[i+1]; k++) {
886       column_values[cnt++] = A->a[k];
887     }
888     for (; j<B->i[i+1]; j++) {
889       column_values[cnt++] = B->a[j];
890     }
891   }
892   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
893 
894   /* store the column values to the file */
895   if (!rank) {
896     MPI_Status status;
897     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
898     for (i=1; i<size; i++) {
899       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
900       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
901       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
902       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
903     }
904   } else {
905     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
906     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
907   }
908   ierr = PetscFree(column_values);CHKERRQ(ierr);
909   PetscFunctionReturn(0);
910 }
911 
912 #undef __FUNCT__
913 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
914 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
915 {
916   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
917   PetscErrorCode    ierr;
918   PetscMPIInt       rank = aij->rank,size = aij->size;
919   PetscTruth        isdraw,iascii,isbinary;
920   PetscViewer       sviewer;
921   PetscViewerFormat format;
922 
923   PetscFunctionBegin;
924   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
925   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
926   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
927   if (iascii) {
928     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
929     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
930       MatInfo    info;
931       PetscTruth inodes;
932 
933       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
934       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
935       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
936       if (!inodes) {
937         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
938 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
939       } else {
940         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
941 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
942       }
943       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
944       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
945       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
946       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
947       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
948       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
949       PetscFunctionReturn(0);
950     } else if (format == PETSC_VIEWER_ASCII_INFO) {
951       PetscInt   inodecount,inodelimit,*inodes;
952       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
953       if (inodes) {
954         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
955       } else {
956         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
957       }
958       PetscFunctionReturn(0);
959     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
960       PetscFunctionReturn(0);
961     }
962   } else if (isbinary) {
963     if (size == 1) {
964       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
965       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
966     } else {
967       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
968     }
969     PetscFunctionReturn(0);
970   } else if (isdraw) {
971     PetscDraw  draw;
972     PetscTruth isnull;
973     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
974     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
975   }
976 
977   if (size == 1) {
978     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
979     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
980   } else {
981     /* assemble the entire matrix onto first processor. */
982     Mat         A;
983     Mat_SeqAIJ  *Aloc;
984     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
985     PetscScalar *a;
986 
987     if (!rank) {
988       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
989     } else {
990       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
991     }
992     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
993     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
994     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
995     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
996 
997     /* copy over the A part */
998     Aloc = (Mat_SeqAIJ*)aij->A->data;
999     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1000     row = aij->rstart;
1001     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1002     for (i=0; i<m; i++) {
1003       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1004       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1005     }
1006     aj = Aloc->j;
1007     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1008 
1009     /* copy over the B part */
1010     Aloc = (Mat_SeqAIJ*)aij->B->data;
1011     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1012     row  = aij->rstart;
1013     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1014     ct   = cols;
1015     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1016     for (i=0; i<m; i++) {
1017       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1018       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1019     }
1020     ierr = PetscFree(ct);CHKERRQ(ierr);
1021     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1022     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1023     /*
1024        Everyone has to call to draw the matrix since the graphics waits are
1025        synchronized across all processors that share the PetscDraw object
1026     */
1027     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1028     if (!rank) {
1029       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1030       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1031     }
1032     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1033     ierr = MatDestroy(A);CHKERRQ(ierr);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatView_MPIAIJ"
1040 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1041 {
1042   PetscErrorCode ierr;
1043   PetscTruth     iascii,isdraw,issocket,isbinary;
1044 
1045   PetscFunctionBegin;
1046   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1047   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1048   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1049   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1050   if (iascii || isdraw || isbinary || issocket) {
1051     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1052   } else {
1053     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatRelax_MPIAIJ"
1062 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1063 {
1064   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1065   PetscErrorCode ierr;
1066   Vec            bb1;
1067   PetscScalar    mone=-1.0;
1068 
1069   PetscFunctionBegin;
1070   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1071 
1072   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1073 
1074   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1075     if (flag & SOR_ZERO_INITIAL_GUESS) {
1076       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1077       its--;
1078     }
1079 
1080     while (its--) {
1081       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1083 
1084       /* update rhs: bb1 = bb - B*x */
1085       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1086       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1087 
1088       /* local sweep */
1089       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1090       CHKERRQ(ierr);
1091     }
1092   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1093     if (flag & SOR_ZERO_INITIAL_GUESS) {
1094       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1095       its--;
1096     }
1097     while (its--) {
1098       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1099       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1100 
1101       /* update rhs: bb1 = bb - B*x */
1102       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1103       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1104 
1105       /* local sweep */
1106       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1107       CHKERRQ(ierr);
1108     }
1109   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1110     if (flag & SOR_ZERO_INITIAL_GUESS) {
1111       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1112       its--;
1113     }
1114     while (its--) {
1115       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1116       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1117 
1118       /* update rhs: bb1 = bb - B*x */
1119       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1120       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1121 
1122       /* local sweep */
1123       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1124       CHKERRQ(ierr);
1125     }
1126   } else {
1127     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1128   }
1129 
1130   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1136 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1137 {
1138   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1139   Mat            A = mat->A,B = mat->B;
1140   PetscErrorCode ierr;
1141   PetscReal      isend[5],irecv[5];
1142 
1143   PetscFunctionBegin;
1144   info->block_size     = 1.0;
1145   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1146   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1147   isend[3] = info->memory;  isend[4] = info->mallocs;
1148   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1149   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1150   isend[3] += info->memory;  isend[4] += info->mallocs;
1151   if (flag == MAT_LOCAL) {
1152     info->nz_used      = isend[0];
1153     info->nz_allocated = isend[1];
1154     info->nz_unneeded  = isend[2];
1155     info->memory       = isend[3];
1156     info->mallocs      = isend[4];
1157   } else if (flag == MAT_GLOBAL_MAX) {
1158     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1159     info->nz_used      = irecv[0];
1160     info->nz_allocated = irecv[1];
1161     info->nz_unneeded  = irecv[2];
1162     info->memory       = irecv[3];
1163     info->mallocs      = irecv[4];
1164   } else if (flag == MAT_GLOBAL_SUM) {
1165     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   }
1172   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1173   info->fill_ratio_needed = 0;
1174   info->factor_mallocs    = 0;
1175   info->rows_global       = (double)matin->M;
1176   info->columns_global    = (double)matin->N;
1177   info->rows_local        = (double)matin->m;
1178   info->columns_local     = (double)matin->N;
1179 
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatSetOption_MPIAIJ"
1185 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1186 {
1187   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191   switch (op) {
1192   case MAT_NO_NEW_NONZERO_LOCATIONS:
1193   case MAT_YES_NEW_NONZERO_LOCATIONS:
1194   case MAT_COLUMNS_UNSORTED:
1195   case MAT_COLUMNS_SORTED:
1196   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1197   case MAT_KEEP_ZEROED_ROWS:
1198   case MAT_NEW_NONZERO_LOCATION_ERR:
1199   case MAT_USE_INODES:
1200   case MAT_DO_NOT_USE_INODES:
1201   case MAT_IGNORE_ZERO_ENTRIES:
1202     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1203     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1204     break;
1205   case MAT_ROW_ORIENTED:
1206     a->roworiented = PETSC_TRUE;
1207     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1209     break;
1210   case MAT_ROWS_SORTED:
1211   case MAT_ROWS_UNSORTED:
1212   case MAT_YES_NEW_DIAGONALS:
1213     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1214     break;
1215   case MAT_COLUMN_ORIENTED:
1216     a->roworiented = PETSC_FALSE;
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_IGNORE_OFF_PROC_ENTRIES:
1221     a->donotstash = PETSC_TRUE;
1222     break;
1223   case MAT_NO_NEW_DIAGONALS:
1224     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1225   case MAT_SYMMETRIC:
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227   case MAT_HERMITIAN:
1228   case MAT_SYMMETRY_ETERNAL:
1229     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1230     break;
1231   case MAT_NOT_SYMMETRIC:
1232   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1233   case MAT_NOT_HERMITIAN:
1234   case MAT_NOT_SYMMETRY_ETERNAL:
1235     break;
1236   default:
1237     SETERRQ(PETSC_ERR_SUP,"unknown option");
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatGetRow_MPIAIJ"
1244 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1245 {
1246   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1247   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1248   PetscErrorCode ierr;
1249   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1250   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1251   PetscInt       *cmap,*idx_p;
1252 
1253   PetscFunctionBegin;
1254   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1255   mat->getrowactive = PETSC_TRUE;
1256 
1257   if (!mat->rowvalues && (idx || v)) {
1258     /*
1259         allocate enough space to hold information from the longest row.
1260     */
1261     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1262     PetscInt     max = 1,tmp;
1263     for (i=0; i<matin->m; i++) {
1264       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1265       if (max < tmp) { max = tmp; }
1266     }
1267     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1268     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1269   }
1270 
1271   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1272   lrow = row - rstart;
1273 
1274   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1275   if (!v)   {pvA = 0; pvB = 0;}
1276   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1277   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1278   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1279   nztot = nzA + nzB;
1280 
1281   cmap  = mat->garray;
1282   if (v  || idx) {
1283     if (nztot) {
1284       /* Sort by increasing column numbers, assuming A and B already sorted */
1285       PetscInt imark = -1;
1286       if (v) {
1287         *v = v_p = mat->rowvalues;
1288         for (i=0; i<nzB; i++) {
1289           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1290           else break;
1291         }
1292         imark = i;
1293         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1294         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1295       }
1296       if (idx) {
1297         *idx = idx_p = mat->rowindices;
1298         if (imark > -1) {
1299           for (i=0; i<imark; i++) {
1300             idx_p[i] = cmap[cworkB[i]];
1301           }
1302         } else {
1303           for (i=0; i<nzB; i++) {
1304             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1305             else break;
1306           }
1307           imark = i;
1308         }
1309         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1310         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1311       }
1312     } else {
1313       if (idx) *idx = 0;
1314       if (v)   *v   = 0;
1315     }
1316   }
1317   *nz = nztot;
1318   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1319   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1325 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1328 
1329   PetscFunctionBegin;
1330   if (!aij->getrowactive) {
1331     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1332   }
1333   aij->getrowactive = PETSC_FALSE;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatNorm_MPIAIJ"
1339 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1340 {
1341   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1342   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1343   PetscErrorCode ierr;
1344   PetscInt       i,j,cstart = aij->cstart;
1345   PetscReal      sum = 0.0;
1346   PetscScalar    *v;
1347 
1348   PetscFunctionBegin;
1349   if (aij->size == 1) {
1350     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1351   } else {
1352     if (type == NORM_FROBENIUS) {
1353       v = amat->a;
1354       for (i=0; i<amat->nz; i++) {
1355 #if defined(PETSC_USE_COMPLEX)
1356         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1357 #else
1358         sum += (*v)*(*v); v++;
1359 #endif
1360       }
1361       v = bmat->a;
1362       for (i=0; i<bmat->nz; i++) {
1363 #if defined(PETSC_USE_COMPLEX)
1364         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1365 #else
1366         sum += (*v)*(*v); v++;
1367 #endif
1368       }
1369       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1370       *norm = sqrt(*norm);
1371     } else if (type == NORM_1) { /* max column norm */
1372       PetscReal *tmp,*tmp2;
1373       PetscInt    *jj,*garray = aij->garray;
1374       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1375       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1376       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1377       *norm = 0.0;
1378       v = amat->a; jj = amat->j;
1379       for (j=0; j<amat->nz; j++) {
1380         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1381       }
1382       v = bmat->a; jj = bmat->j;
1383       for (j=0; j<bmat->nz; j++) {
1384         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1385       }
1386       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       for (j=0; j<mat->N; j++) {
1388         if (tmp2[j] > *norm) *norm = tmp2[j];
1389       }
1390       ierr = PetscFree(tmp);CHKERRQ(ierr);
1391       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1392     } else if (type == NORM_INFINITY) { /* max row norm */
1393       PetscReal ntemp = 0.0;
1394       for (j=0; j<aij->A->m; j++) {
1395         v = amat->a + amat->i[j];
1396         sum = 0.0;
1397         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1398           sum += PetscAbsScalar(*v); v++;
1399         }
1400         v = bmat->a + bmat->i[j];
1401         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1402           sum += PetscAbsScalar(*v); v++;
1403         }
1404         if (sum > ntemp) ntemp = sum;
1405       }
1406       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1407     } else {
1408       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1409     }
1410   }
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatTranspose_MPIAIJ"
1416 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1417 {
1418   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1419   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1420   PetscErrorCode ierr;
1421   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1422   Mat            B;
1423   PetscScalar    *array;
1424 
1425   PetscFunctionBegin;
1426   if (!matout && M != N) {
1427     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1428   }
1429 
1430   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1431   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1432   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1433 
1434   /* copy over the A part */
1435   Aloc = (Mat_SeqAIJ*)a->A->data;
1436   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1437   row = a->rstart;
1438   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1439   for (i=0; i<m; i++) {
1440     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1441     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1442   }
1443   aj = Aloc->j;
1444   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1445 
1446   /* copy over the B part */
1447   Aloc = (Mat_SeqAIJ*)a->B->data;
1448   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1449   row  = a->rstart;
1450   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1451   ct   = cols;
1452   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1453   for (i=0; i<m; i++) {
1454     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1455     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1456   }
1457   ierr = PetscFree(ct);CHKERRQ(ierr);
1458   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1459   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1460   if (matout) {
1461     *matout = B;
1462   } else {
1463     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1470 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1471 {
1472   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1473   Mat            a = aij->A,b = aij->B;
1474   PetscErrorCode ierr;
1475   PetscInt       s1,s2,s3;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1479   if (rr) {
1480     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1481     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1482     /* Overlap communication with computation. */
1483     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1484   }
1485   if (ll) {
1486     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1487     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1488     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1489   }
1490   /* scale  the diagonal block */
1491   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1492 
1493   if (rr) {
1494     /* Do a scatter end and then right scale the off-diagonal block */
1495     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1496     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1497   }
1498 
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1505 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1506 {
1507   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1508   PetscErrorCode ierr;
1509 
1510   PetscFunctionBegin;
1511   if (!a->rank) {
1512     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1513   }
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1519 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1520 {
1521   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1526   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1531 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1532 {
1533   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatEqual_MPIAIJ"
1543 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1544 {
1545   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1546   Mat            a,b,c,d;
1547   PetscTruth     flg;
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   a = matA->A; b = matA->B;
1552   c = matB->A; d = matB->B;
1553 
1554   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1555   if (flg) {
1556     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1557   }
1558   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatCopy_MPIAIJ"
1564 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1565 {
1566   PetscErrorCode ierr;
1567   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1568   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1569 
1570   PetscFunctionBegin;
1571   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1572   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1573     /* because of the column compression in the off-processor part of the matrix a->B,
1574        the number of columns in a->B and b->B may be different, hence we cannot call
1575        the MatCopy() directly on the two parts. If need be, we can provide a more
1576        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1577        then copying the submatrices */
1578     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1579   } else {
1580     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1581     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1582   }
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1588 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1589 {
1590   PetscErrorCode ierr;
1591 
1592   PetscFunctionBegin;
1593   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 #include "petscblaslapack.h"
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatAXPY_MPIAIJ"
1600 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1601 {
1602   PetscErrorCode ierr;
1603   PetscInt       i;
1604   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1605   PetscBLASInt   bnz,one=1;
1606   Mat_SeqAIJ     *x,*y;
1607 
1608   PetscFunctionBegin;
1609   if (str == SAME_NONZERO_PATTERN) {
1610     x = (Mat_SeqAIJ *)xx->A->data;
1611     y = (Mat_SeqAIJ *)yy->A->data;
1612     bnz = (PetscBLASInt)x->nz;
1613     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1614     x = (Mat_SeqAIJ *)xx->B->data;
1615     y = (Mat_SeqAIJ *)yy->B->data;
1616     bnz = (PetscBLASInt)x->nz;
1617     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1618   } else if (str == SUBSET_NONZERO_PATTERN) {
1619     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1620 
1621     x = (Mat_SeqAIJ *)xx->B->data;
1622     y = (Mat_SeqAIJ *)yy->B->data;
1623     if (y->xtoy && y->XtoY != xx->B) {
1624       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1625       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1626     }
1627     if (!y->xtoy) { /* get xtoy */
1628       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1629       y->XtoY = xx->B;
1630     }
1631     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1632   } else {
1633     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 /* -------------------------------------------------------------------*/
1639 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1640        MatGetRow_MPIAIJ,
1641        MatRestoreRow_MPIAIJ,
1642        MatMult_MPIAIJ,
1643 /* 4*/ MatMultAdd_MPIAIJ,
1644        MatMultTranspose_MPIAIJ,
1645        MatMultTransposeAdd_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649 /*10*/ 0,
1650        0,
1651        0,
1652        MatRelax_MPIAIJ,
1653        MatTranspose_MPIAIJ,
1654 /*15*/ MatGetInfo_MPIAIJ,
1655        MatEqual_MPIAIJ,
1656        MatGetDiagonal_MPIAIJ,
1657        MatDiagonalScale_MPIAIJ,
1658        MatNorm_MPIAIJ,
1659 /*20*/ MatAssemblyBegin_MPIAIJ,
1660        MatAssemblyEnd_MPIAIJ,
1661        0,
1662        MatSetOption_MPIAIJ,
1663        MatZeroEntries_MPIAIJ,
1664 /*25*/ MatZeroRows_MPIAIJ,
1665        0,
1666        0,
1667        0,
1668        0,
1669 /*30*/ MatSetUpPreallocation_MPIAIJ,
1670        0,
1671        0,
1672        0,
1673        0,
1674 /*35*/ MatDuplicate_MPIAIJ,
1675        0,
1676        0,
1677        0,
1678        0,
1679 /*40*/ MatAXPY_MPIAIJ,
1680        MatGetSubMatrices_MPIAIJ,
1681        MatIncreaseOverlap_MPIAIJ,
1682        MatGetValues_MPIAIJ,
1683        MatCopy_MPIAIJ,
1684 /*45*/ MatPrintHelp_MPIAIJ,
1685        MatScale_MPIAIJ,
1686        0,
1687        0,
1688        0,
1689 /*50*/ MatSetBlockSize_MPIAIJ,
1690        0,
1691        0,
1692        0,
1693        0,
1694 /*55*/ MatFDColoringCreate_MPIAIJ,
1695        0,
1696        MatSetUnfactored_MPIAIJ,
1697        0,
1698        0,
1699 /*60*/ MatGetSubMatrix_MPIAIJ,
1700        MatDestroy_MPIAIJ,
1701        MatView_MPIAIJ,
1702        MatGetPetscMaps_Petsc,
1703        0,
1704 /*65*/ 0,
1705        0,
1706        0,
1707        0,
1708        0,
1709 /*70*/ 0,
1710        0,
1711        MatSetColoring_MPIAIJ,
1712 #if defined(PETSC_HAVE_ADIC)
1713        MatSetValuesAdic_MPIAIJ,
1714 #else
1715        0,
1716 #endif
1717        MatSetValuesAdifor_MPIAIJ,
1718 /*75*/ 0,
1719        0,
1720        0,
1721        0,
1722        0,
1723 /*80*/ 0,
1724        0,
1725        0,
1726        0,
1727 /*84*/ MatLoad_MPIAIJ,
1728        0,
1729        0,
1730        0,
1731        0,
1732        0,
1733 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1734        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1735        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1736        MatPtAP_Basic,
1737        MatPtAPSymbolic_MPIAIJ,
1738 /*95*/ MatPtAPNumeric_MPIAIJ,
1739        0,
1740        0,
1741        0,
1742        0,
1743 /*100*/0,
1744        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1745        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1746 };
1747 
1748 /* ----------------------------------------------------------------------------------------*/
1749 
1750 EXTERN_C_BEGIN
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1753 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1754 {
1755   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1756   PetscErrorCode ierr;
1757 
1758   PetscFunctionBegin;
1759   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1760   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 EXTERN_C_END
1764 
1765 EXTERN_C_BEGIN
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1768 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1769 {
1770   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1771   PetscErrorCode ierr;
1772 
1773   PetscFunctionBegin;
1774   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1775   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 EXTERN_C_END
1779 
1780 #include "petscpc.h"
1781 EXTERN_C_BEGIN
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1784 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1785 {
1786   Mat_MPIAIJ     *b;
1787   PetscErrorCode ierr;
1788   PetscInt       i;
1789 
1790   PetscFunctionBegin;
1791   B->preallocated = PETSC_TRUE;
1792   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1793   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1794   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1795   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1796   if (d_nnz) {
1797     for (i=0; i<B->m; i++) {
1798       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1799     }
1800   }
1801   if (o_nnz) {
1802     for (i=0; i<B->m; i++) {
1803       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1804     }
1805   }
1806   b = (Mat_MPIAIJ*)B->data;
1807   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1808   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1809 
1810   PetscFunctionReturn(0);
1811 }
1812 EXTERN_C_END
1813 
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1816 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1817 {
1818   Mat            mat;
1819   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1820   PetscErrorCode ierr;
1821 
1822   PetscFunctionBegin;
1823   *newmat       = 0;
1824   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1825   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1826   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1827   a    = (Mat_MPIAIJ*)mat->data;
1828 
1829   mat->factor       = matin->factor;
1830   mat->bs           = matin->bs;
1831   mat->assembled    = PETSC_TRUE;
1832   mat->insertmode   = NOT_SET_VALUES;
1833   mat->preallocated = PETSC_TRUE;
1834 
1835   a->rstart       = oldmat->rstart;
1836   a->rend         = oldmat->rend;
1837   a->cstart       = oldmat->cstart;
1838   a->cend         = oldmat->cend;
1839   a->size         = oldmat->size;
1840   a->rank         = oldmat->rank;
1841   a->donotstash   = oldmat->donotstash;
1842   a->roworiented  = oldmat->roworiented;
1843   a->rowindices   = 0;
1844   a->rowvalues    = 0;
1845   a->getrowactive = PETSC_FALSE;
1846 
1847   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1848   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1849   if (oldmat->colmap) {
1850 #if defined (PETSC_USE_CTABLE)
1851     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1852 #else
1853     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1854     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1855     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1856 #endif
1857   } else a->colmap = 0;
1858   if (oldmat->garray) {
1859     PetscInt len;
1860     len  = oldmat->B->n;
1861     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1862     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1863     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1864   } else a->garray = 0;
1865 
1866   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1867   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1868   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1869   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1870   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1871   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1872   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1873   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1874   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1875   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1876   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1877   *newmat = mat;
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #include "petscsys.h"
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatLoad_MPIAIJ"
1885 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1886 {
1887   Mat            A;
1888   PetscScalar    *vals,*svals;
1889   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1890   MPI_Status     status;
1891   PetscErrorCode ierr;
1892   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1893   PetscInt       i,nz,j,rstart,rend,mmax;
1894   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1895   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1896   PetscInt       cend,cstart,n,*rowners;
1897   int            fd;
1898 
1899   PetscFunctionBegin;
1900   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1901   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1902   if (!rank) {
1903     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1904     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1905     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1906   }
1907 
1908   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1909   M = header[1]; N = header[2];
1910   /* determine ownership of all rows */
1911   m    = M/size + ((M % size) > rank);
1912   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1913   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1914 
1915   /* First process needs enough room for process with most rows */
1916   if (!rank) {
1917     mmax       = rowners[1];
1918     for (i=2; i<size; i++) {
1919       mmax = PetscMax(mmax,rowners[i]);
1920     }
1921   } else mmax = m;
1922 
1923   rowners[0] = 0;
1924   for (i=2; i<=size; i++) {
1925     mmax       = PetscMax(mmax,rowners[i]);
1926     rowners[i] += rowners[i-1];
1927   }
1928   rstart = rowners[rank];
1929   rend   = rowners[rank+1];
1930 
1931   /* distribute row lengths to all processors */
1932   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1933   if (!rank) {
1934     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1935     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1936     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1937     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1938     for (j=0; j<m; j++) {
1939       procsnz[0] += ourlens[j];
1940     }
1941     for (i=1; i<size; i++) {
1942       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1943       /* calculate the number of nonzeros on each processor */
1944       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1945         procsnz[i] += rowlengths[j];
1946       }
1947       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1948     }
1949     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1950   } else {
1951     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1952   }
1953 
1954   if (!rank) {
1955     /* determine max buffer needed and allocate it */
1956     maxnz = 0;
1957     for (i=0; i<size; i++) {
1958       maxnz = PetscMax(maxnz,procsnz[i]);
1959     }
1960     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1961 
1962     /* read in my part of the matrix column indices  */
1963     nz   = procsnz[0];
1964     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1965     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1966 
1967     /* read in every one elses and ship off */
1968     for (i=1; i<size; i++) {
1969       nz   = procsnz[i];
1970       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1971       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1972     }
1973     ierr = PetscFree(cols);CHKERRQ(ierr);
1974   } else {
1975     /* determine buffer space needed for message */
1976     nz = 0;
1977     for (i=0; i<m; i++) {
1978       nz += ourlens[i];
1979     }
1980     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1981 
1982     /* receive message of column indices*/
1983     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1984     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1985     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1986   }
1987 
1988   /* determine column ownership if matrix is not square */
1989   if (N != M) {
1990     n      = N/size + ((N % size) > rank);
1991     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1992     cstart = cend - n;
1993   } else {
1994     cstart = rstart;
1995     cend   = rend;
1996     n      = cend - cstart;
1997   }
1998 
1999   /* loop over local rows, determining number of off diagonal entries */
2000   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2001   jj = 0;
2002   for (i=0; i<m; i++) {
2003     for (j=0; j<ourlens[i]; j++) {
2004       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2005       jj++;
2006     }
2007   }
2008 
2009   /* create our matrix */
2010   for (i=0; i<m; i++) {
2011     ourlens[i] -= offlens[i];
2012   }
2013   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2014   ierr = MatSetType(A,type);CHKERRQ(ierr);
2015   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2016 
2017   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2018   for (i=0; i<m; i++) {
2019     ourlens[i] += offlens[i];
2020   }
2021 
2022   if (!rank) {
2023     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2024 
2025     /* read in my part of the matrix numerical values  */
2026     nz   = procsnz[0];
2027     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2028 
2029     /* insert into matrix */
2030     jj      = rstart;
2031     smycols = mycols;
2032     svals   = vals;
2033     for (i=0; i<m; i++) {
2034       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2035       smycols += ourlens[i];
2036       svals   += ourlens[i];
2037       jj++;
2038     }
2039 
2040     /* read in other processors and ship out */
2041     for (i=1; i<size; i++) {
2042       nz   = procsnz[i];
2043       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2044       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2045     }
2046     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2047   } else {
2048     /* receive numeric values */
2049     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2050 
2051     /* receive message of values*/
2052     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2053     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2054     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2055 
2056     /* insert into matrix */
2057     jj      = rstart;
2058     smycols = mycols;
2059     svals   = vals;
2060     for (i=0; i<m; i++) {
2061       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2062       smycols += ourlens[i];
2063       svals   += ourlens[i];
2064       jj++;
2065     }
2066   }
2067   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2068   ierr = PetscFree(vals);CHKERRQ(ierr);
2069   ierr = PetscFree(mycols);CHKERRQ(ierr);
2070   ierr = PetscFree(rowners);CHKERRQ(ierr);
2071 
2072   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2073   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2074   *newmat = A;
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2080 /*
2081     Not great since it makes two copies of the submatrix, first an SeqAIJ
2082   in local and then by concatenating the local matrices the end result.
2083   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2084 */
2085 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2086 {
2087   PetscErrorCode ierr;
2088   PetscMPIInt    rank,size;
2089   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2090   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2091   Mat            *local,M,Mreuse;
2092   PetscScalar    *vwork,*aa;
2093   MPI_Comm       comm = mat->comm;
2094   Mat_SeqAIJ     *aij;
2095 
2096 
2097   PetscFunctionBegin;
2098   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2099   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2100 
2101   if (call ==  MAT_REUSE_MATRIX) {
2102     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2103     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2104     local = &Mreuse;
2105     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2106   } else {
2107     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2108     Mreuse = *local;
2109     ierr   = PetscFree(local);CHKERRQ(ierr);
2110   }
2111 
2112   /*
2113       m - number of local rows
2114       n - number of columns (same on all processors)
2115       rstart - first row in new global matrix generated
2116   */
2117   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2118   if (call == MAT_INITIAL_MATRIX) {
2119     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2120     ii  = aij->i;
2121     jj  = aij->j;
2122 
2123     /*
2124         Determine the number of non-zeros in the diagonal and off-diagonal
2125         portions of the matrix in order to do correct preallocation
2126     */
2127 
2128     /* first get start and end of "diagonal" columns */
2129     if (csize == PETSC_DECIDE) {
2130       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2131       if (mglobal == n) { /* square matrix */
2132 	nlocal = m;
2133       } else {
2134         nlocal = n/size + ((n % size) > rank);
2135       }
2136     } else {
2137       nlocal = csize;
2138     }
2139     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2140     rstart = rend - nlocal;
2141     if (rank == size - 1 && rend != n) {
2142       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2143     }
2144 
2145     /* next, compute all the lengths */
2146     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2147     olens = dlens + m;
2148     for (i=0; i<m; i++) {
2149       jend = ii[i+1] - ii[i];
2150       olen = 0;
2151       dlen = 0;
2152       for (j=0; j<jend; j++) {
2153         if (*jj < rstart || *jj >= rend) olen++;
2154         else dlen++;
2155         jj++;
2156       }
2157       olens[i] = olen;
2158       dlens[i] = dlen;
2159     }
2160     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2161     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2162     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2163     ierr = PetscFree(dlens);CHKERRQ(ierr);
2164   } else {
2165     PetscInt ml,nl;
2166 
2167     M = *newmat;
2168     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2169     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2170     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2171     /*
2172          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2173        rather than the slower MatSetValues().
2174     */
2175     M->was_assembled = PETSC_TRUE;
2176     M->assembled     = PETSC_FALSE;
2177   }
2178   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2179   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2180   ii  = aij->i;
2181   jj  = aij->j;
2182   aa  = aij->a;
2183   for (i=0; i<m; i++) {
2184     row   = rstart + i;
2185     nz    = ii[i+1] - ii[i];
2186     cwork = jj;     jj += nz;
2187     vwork = aa;     aa += nz;
2188     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2189   }
2190 
2191   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2192   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   *newmat = M;
2194 
2195   /* save submatrix used in processor for next request */
2196   if (call ==  MAT_INITIAL_MATRIX) {
2197     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2198     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2199   }
2200 
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 EXTERN_C_BEGIN
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2207 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2208 {
2209   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2210   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2211   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2212   const PetscInt *JJ;
2213   PetscScalar    *values;
2214   PetscErrorCode ierr;
2215 
2216   PetscFunctionBegin;
2217 #if defined(PETSC_OPT_g)
2218   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2219 #endif
2220   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2221   o_nnz = d_nnz + m;
2222 
2223   for (i=0; i<m; i++) {
2224     nnz     = I[i+1]- I[i];
2225     JJ      = J + I[i];
2226     nnz_max = PetscMax(nnz_max,nnz);
2227 #if defined(PETSC_OPT_g)
2228     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2229 #endif
2230     for (j=0; j<nnz; j++) {
2231       if (*JJ >= cstart) break;
2232       JJ++;
2233     }
2234     d = 0;
2235     for (; j<nnz; j++) {
2236       if (*JJ++ >= cend) break;
2237       d++;
2238     }
2239     d_nnz[i] = d;
2240     o_nnz[i] = nnz - d;
2241   }
2242   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2243   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2244 
2245   if (v) values = (PetscScalar*)v;
2246   else {
2247     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2248     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2249   }
2250 
2251   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2252   for (i=0; i<m; i++) {
2253     ii   = i + rstart;
2254     nnz  = I[i+1]- I[i];
2255     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2256   }
2257   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2258   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2259   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2260 
2261   if (!v) {
2262     ierr = PetscFree(values);CHKERRQ(ierr);
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 EXTERN_C_END
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2270 /*@C
2271    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2272    (the default parallel PETSc format).
2273 
2274    Collective on MPI_Comm
2275 
2276    Input Parameters:
2277 +  A - the matrix
2278 .  i - the indices into j for the start of each local row (starts with zero)
2279 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2280 -  v - optional values in the matrix
2281 
2282    Level: developer
2283 
2284 .keywords: matrix, aij, compressed row, sparse, parallel
2285 
2286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2287 @*/
2288 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2289 {
2290   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2291 
2292   PetscFunctionBegin;
2293   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2294   if (f) {
2295     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2296   }
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2302 /*@C
2303    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2304    (the default parallel PETSc format).  For good matrix assembly performance
2305    the user should preallocate the matrix storage by setting the parameters
2306    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2307    performance can be increased by more than a factor of 50.
2308 
2309    Collective on MPI_Comm
2310 
2311    Input Parameters:
2312 +  A - the matrix
2313 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2314            (same value is used for all local rows)
2315 .  d_nnz - array containing the number of nonzeros in the various rows of the
2316            DIAGONAL portion of the local submatrix (possibly different for each row)
2317            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2318            The size of this array is equal to the number of local rows, i.e 'm'.
2319            You must leave room for the diagonal entry even if it is zero.
2320 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2321            submatrix (same value is used for all local rows).
2322 -  o_nnz - array containing the number of nonzeros in the various rows of the
2323            OFF-DIAGONAL portion of the local submatrix (possibly different for
2324            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2325            structure. The size of this array is equal to the number
2326            of local rows, i.e 'm'.
2327 
2328    If the *_nnz parameter is given then the *_nz parameter is ignored
2329 
2330    The AIJ format (also called the Yale sparse matrix format or
2331    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2332    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2333 
2334    The parallel matrix is partitioned such that the first m0 rows belong to
2335    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2336    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2337 
2338    The DIAGONAL portion of the local submatrix of a processor can be defined
2339    as the submatrix which is obtained by extraction the part corresponding
2340    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2341    first row that belongs to the processor, and r2 is the last row belonging
2342    to the this processor. This is a square mxm matrix. The remaining portion
2343    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2344 
2345    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2346 
2347    Example usage:
2348 
2349    Consider the following 8x8 matrix with 34 non-zero values, that is
2350    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2351    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2352    as follows:
2353 
2354 .vb
2355             1  2  0  |  0  3  0  |  0  4
2356     Proc0   0  5  6  |  7  0  0  |  8  0
2357             9  0 10  | 11  0  0  | 12  0
2358     -------------------------------------
2359            13  0 14  | 15 16 17  |  0  0
2360     Proc1   0 18  0  | 19 20 21  |  0  0
2361             0  0  0  | 22 23  0  | 24  0
2362     -------------------------------------
2363     Proc2  25 26 27  |  0  0 28  | 29  0
2364            30  0  0  | 31 32 33  |  0 34
2365 .ve
2366 
2367    This can be represented as a collection of submatrices as:
2368 
2369 .vb
2370       A B C
2371       D E F
2372       G H I
2373 .ve
2374 
2375    Where the submatrices A,B,C are owned by proc0, D,E,F are
2376    owned by proc1, G,H,I are owned by proc2.
2377 
2378    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2379    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2380    The 'M','N' parameters are 8,8, and have the same values on all procs.
2381 
2382    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2383    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2384    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2385    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2386    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2387    matrix, ans [DF] as another SeqAIJ matrix.
2388 
2389    When d_nz, o_nz parameters are specified, d_nz storage elements are
2390    allocated for every row of the local diagonal submatrix, and o_nz
2391    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2392    One way to choose d_nz and o_nz is to use the max nonzerors per local
2393    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2394    In this case, the values of d_nz,o_nz are:
2395 .vb
2396      proc0 : dnz = 2, o_nz = 2
2397      proc1 : dnz = 3, o_nz = 2
2398      proc2 : dnz = 1, o_nz = 4
2399 .ve
2400    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2401    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2402    for proc3. i.e we are using 12+15+10=37 storage locations to store
2403    34 values.
2404 
2405    When d_nnz, o_nnz parameters are specified, the storage is specified
2406    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2407    In the above case the values for d_nnz,o_nnz are:
2408 .vb
2409      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2410      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2411      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2412 .ve
2413    Here the space allocated is sum of all the above values i.e 34, and
2414    hence pre-allocation is perfect.
2415 
2416    Level: intermediate
2417 
2418 .keywords: matrix, aij, compressed row, sparse, parallel
2419 
2420 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2421           MPIAIJ
2422 @*/
2423 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2424 {
2425   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2429   if (f) {
2430     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatCreateMPIAIJ"
2437 /*@C
2438    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2439    (the default parallel PETSc format).  For good matrix assembly performance
2440    the user should preallocate the matrix storage by setting the parameters
2441    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2442    performance can be increased by more than a factor of 50.
2443 
2444    Collective on MPI_Comm
2445 
2446    Input Parameters:
2447 +  comm - MPI communicator
2448 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2449            This value should be the same as the local size used in creating the
2450            y vector for the matrix-vector product y = Ax.
2451 .  n - This value should be the same as the local size used in creating the
2452        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2453        calculated if N is given) For square matrices n is almost always m.
2454 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2455 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2456 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2457            (same value is used for all local rows)
2458 .  d_nnz - array containing the number of nonzeros in the various rows of the
2459            DIAGONAL portion of the local submatrix (possibly different for each row)
2460            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2461            The size of this array is equal to the number of local rows, i.e 'm'.
2462            You must leave room for the diagonal entry even if it is zero.
2463 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2464            submatrix (same value is used for all local rows).
2465 -  o_nnz - array containing the number of nonzeros in the various rows of the
2466            OFF-DIAGONAL portion of the local submatrix (possibly different for
2467            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2468            structure. The size of this array is equal to the number
2469            of local rows, i.e 'm'.
2470 
2471    Output Parameter:
2472 .  A - the matrix
2473 
2474    Notes:
2475    If the *_nnz parameter is given then the *_nz parameter is ignored
2476 
2477    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2478    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2479    storage requirements for this matrix.
2480 
2481    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2482    processor than it must be used on all processors that share the object for
2483    that argument.
2484 
2485    The user MUST specify either the local or global matrix dimensions
2486    (possibly both).
2487 
2488    The parallel matrix is partitioned such that the first m0 rows belong to
2489    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2490    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2491 
2492    The DIAGONAL portion of the local submatrix of a processor can be defined
2493    as the submatrix which is obtained by extraction the part corresponding
2494    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2495    first row that belongs to the processor, and r2 is the last row belonging
2496    to the this processor. This is a square mxm matrix. The remaining portion
2497    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2498 
2499    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2500 
2501    When calling this routine with a single process communicator, a matrix of
2502    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2503    type of communicator, use the construction mechanism:
2504      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2505 
2506    By default, this format uses inodes (identical nodes) when possible.
2507    We search for consecutive rows with the same nonzero structure, thereby
2508    reusing matrix information to achieve increased efficiency.
2509 
2510    Options Database Keys:
2511 +  -mat_no_inode  - Do not use inodes
2512 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2513 -  -mat_aij_oneindex - Internally use indexing starting at 1
2514         rather than 0.  Note that when calling MatSetValues(),
2515         the user still MUST index entries starting at 0!
2516 
2517 
2518    Example usage:
2519 
2520    Consider the following 8x8 matrix with 34 non-zero values, that is
2521    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2522    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2523    as follows:
2524 
2525 .vb
2526             1  2  0  |  0  3  0  |  0  4
2527     Proc0   0  5  6  |  7  0  0  |  8  0
2528             9  0 10  | 11  0  0  | 12  0
2529     -------------------------------------
2530            13  0 14  | 15 16 17  |  0  0
2531     Proc1   0 18  0  | 19 20 21  |  0  0
2532             0  0  0  | 22 23  0  | 24  0
2533     -------------------------------------
2534     Proc2  25 26 27  |  0  0 28  | 29  0
2535            30  0  0  | 31 32 33  |  0 34
2536 .ve
2537 
2538    This can be represented as a collection of submatrices as:
2539 
2540 .vb
2541       A B C
2542       D E F
2543       G H I
2544 .ve
2545 
2546    Where the submatrices A,B,C are owned by proc0, D,E,F are
2547    owned by proc1, G,H,I are owned by proc2.
2548 
2549    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2550    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2551    The 'M','N' parameters are 8,8, and have the same values on all procs.
2552 
2553    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2554    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2555    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2556    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2557    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2558    matrix, ans [DF] as another SeqAIJ matrix.
2559 
2560    When d_nz, o_nz parameters are specified, d_nz storage elements are
2561    allocated for every row of the local diagonal submatrix, and o_nz
2562    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2563    One way to choose d_nz and o_nz is to use the max nonzerors per local
2564    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2565    In this case, the values of d_nz,o_nz are:
2566 .vb
2567      proc0 : dnz = 2, o_nz = 2
2568      proc1 : dnz = 3, o_nz = 2
2569      proc2 : dnz = 1, o_nz = 4
2570 .ve
2571    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2572    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2573    for proc3. i.e we are using 12+15+10=37 storage locations to store
2574    34 values.
2575 
2576    When d_nnz, o_nnz parameters are specified, the storage is specified
2577    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2578    In the above case the values for d_nnz,o_nnz are:
2579 .vb
2580      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2581      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2582      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2583 .ve
2584    Here the space allocated is sum of all the above values i.e 34, and
2585    hence pre-allocation is perfect.
2586 
2587    Level: intermediate
2588 
2589 .keywords: matrix, aij, compressed row, sparse, parallel
2590 
2591 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2592           MPIAIJ
2593 @*/
2594 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2595 {
2596   PetscErrorCode ierr;
2597   PetscMPIInt    size;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2601   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2602   if (size > 1) {
2603     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2604     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2605   } else {
2606     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2607     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2608   }
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 #undef __FUNCT__
2613 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2614 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2615 {
2616   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2617 
2618   PetscFunctionBegin;
2619   *Ad     = a->A;
2620   *Ao     = a->B;
2621   *colmap = a->garray;
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2627 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2628 {
2629   PetscErrorCode ierr;
2630   PetscInt       i;
2631   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2632 
2633   PetscFunctionBegin;
2634   if (coloring->ctype == IS_COLORING_LOCAL) {
2635     ISColoringValue *allcolors,*colors;
2636     ISColoring      ocoloring;
2637 
2638     /* set coloring for diagonal portion */
2639     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2640 
2641     /* set coloring for off-diagonal portion */
2642     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2643     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2644     for (i=0; i<a->B->n; i++) {
2645       colors[i] = allcolors[a->garray[i]];
2646     }
2647     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2648     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2649     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2650     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2651   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2652     ISColoringValue *colors;
2653     PetscInt             *larray;
2654     ISColoring      ocoloring;
2655 
2656     /* set coloring for diagonal portion */
2657     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2658     for (i=0; i<a->A->n; i++) {
2659       larray[i] = i + a->cstart;
2660     }
2661     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2662     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2663     for (i=0; i<a->A->n; i++) {
2664       colors[i] = coloring->colors[larray[i]];
2665     }
2666     ierr = PetscFree(larray);CHKERRQ(ierr);
2667     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2668     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2669     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2670 
2671     /* set coloring for off-diagonal portion */
2672     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2673     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2674     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2675     for (i=0; i<a->B->n; i++) {
2676       colors[i] = coloring->colors[larray[i]];
2677     }
2678     ierr = PetscFree(larray);CHKERRQ(ierr);
2679     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2680     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2681     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2682   } else {
2683     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2684   }
2685 
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #if defined(PETSC_HAVE_ADIC)
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2692 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2693 {
2694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2695   PetscErrorCode ierr;
2696 
2697   PetscFunctionBegin;
2698   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2699   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2700   PetscFunctionReturn(0);
2701 }
2702 #endif
2703 
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2706 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2707 {
2708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2713   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatMerge"
2719 /*@C
2720       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2721                  matrices from each processor
2722 
2723     Collective on MPI_Comm
2724 
2725    Input Parameters:
2726 +    comm - the communicators the parallel matrix will live on
2727 .    inmat - the input sequential matrices
2728 .    n - number of local columns (or PETSC_DECIDE)
2729 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2730 
2731    Output Parameter:
2732 .    outmat - the parallel matrix generated
2733 
2734     Level: advanced
2735 
2736    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2737 
2738 @*/
2739 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2740 {
2741   PetscErrorCode ierr;
2742   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2743   PetscInt       *indx;
2744   PetscScalar    *values;
2745   PetscMap       columnmap,rowmap;
2746 
2747   PetscFunctionBegin;
2748     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2749   /*
2750   PetscMPIInt       rank;
2751   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2752   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2753   */
2754   if (scall == MAT_INITIAL_MATRIX){
2755     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2756     if (n == PETSC_DECIDE){
2757       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2758       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2759       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2760       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2761       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2762     }
2763 
2764     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2765     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2766     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2767     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2768     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2769 
2770     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2771     for (i=0;i<m;i++) {
2772       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2773       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2774       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2775     }
2776     /* This routine will ONLY return MPIAIJ type matrix */
2777     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2778     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2779     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2780     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2781 
2782   } else if (scall == MAT_REUSE_MATRIX){
2783     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2784   } else {
2785     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2786   }
2787 
2788   for (i=0;i<m;i++) {
2789     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2790     I    = i + rstart;
2791     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2792     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2793   }
2794   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2795   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797 
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatFileSplit"
2803 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2804 {
2805   PetscErrorCode    ierr;
2806   PetscMPIInt       rank;
2807   PetscInt          m,N,i,rstart,nnz;
2808   size_t            len;
2809   const PetscInt    *indx;
2810   PetscViewer       out;
2811   char              *name;
2812   Mat               B;
2813   const PetscScalar *values;
2814 
2815   PetscFunctionBegin;
2816   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2817   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2818   /* Should this be the type of the diagonal block of A? */
2819   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2820   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2821   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2822   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2823   for (i=0;i<m;i++) {
2824     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2825     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2826     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2827   }
2828   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2829   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2830 
2831   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2832   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2833   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2834   sprintf(name,"%s.%d",outfile,rank);
2835   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2836   ierr = PetscFree(name);
2837   ierr = MatView(B,out);CHKERRQ(ierr);
2838   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2839   ierr = MatDestroy(B);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2844 #undef __FUNCT__
2845 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2846 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2847 {
2848   PetscErrorCode       ierr;
2849   Mat_Merge_SeqsToMPI  *merge;
2850   PetscObjectContainer container;
2851 
2852   PetscFunctionBegin;
2853   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2854   if (container) {
2855     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2856     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2857     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2858     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2859     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2860     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2862     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2863     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2864     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2865     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2866     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2867 
2868     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2869     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2870   }
2871   ierr = PetscFree(merge);CHKERRQ(ierr);
2872 
2873   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 #include "src/mat/utils/freespace.h"
2878 #include "petscbt.h"
2879 #undef __FUNCT__
2880 #define __FUNCT__ "MatMerge_SeqsToMPI"
2881 /*@C
2882       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2883                  matrices from each processor
2884 
2885     Collective on MPI_Comm
2886 
2887    Input Parameters:
2888 +    comm - the communicators the parallel matrix will live on
2889 .    seqmat - the input sequential matrices
2890 .    m - number of local rows (or PETSC_DECIDE)
2891 .    n - number of local columns (or PETSC_DECIDE)
2892 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2893 
2894    Output Parameter:
2895 .    mpimat - the parallel matrix generated
2896 
2897     Level: advanced
2898 
2899    Notes:
2900      The dimensions of the sequential matrix in each processor MUST be the same.
2901      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2902      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2903 @*/
2904 static PetscEvent logkey_seqstompinum = 0;
2905 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2906 {
2907   PetscErrorCode       ierr;
2908   MPI_Comm             comm=mpimat->comm;
2909   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2910   PetscMPIInt          size,rank,taga,*len_s;
2911   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2912   PetscInt             proc,m;
2913   PetscInt             **buf_ri,**buf_rj;
2914   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2915   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2916   MPI_Request          *s_waits,*r_waits;
2917   MPI_Status           *status;
2918   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2919   Mat_Merge_SeqsToMPI  *merge;
2920   PetscObjectContainer container;
2921 
2922   PetscFunctionBegin;
2923   if (!logkey_seqstompinum) {
2924     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2925   }
2926   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2927 
2928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2929   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2930 
2931   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2932   if (container) {
2933     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2934   }
2935   bi     = merge->bi;
2936   bj     = merge->bj;
2937   buf_ri = merge->buf_ri;
2938   buf_rj = merge->buf_rj;
2939 
2940   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2941   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2942   len_s  = merge->len_s;
2943 
2944   /* send and recv matrix values */
2945   /*-----------------------------*/
2946   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2947   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2948 
2949   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2950   for (proc=0,k=0; proc<size; proc++){
2951     if (!len_s[proc]) continue;
2952     i = owners[proc];
2953     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2954     k++;
2955   }
2956 
2957   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2958   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2959   ierr = PetscFree(status);CHKERRQ(ierr);
2960 
2961   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2962   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2963 
2964   /* insert mat values of mpimat */
2965   /*----------------------------*/
2966   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2967   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2968   nextrow = buf_ri_k + merge->nrecv;
2969   nextai  = nextrow + merge->nrecv;
2970 
2971   for (k=0; k<merge->nrecv; k++){
2972     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2973     nrows = *(buf_ri_k[k]);
2974     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2975     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2976   }
2977 
2978   /* set values of ba */
2979   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2980   for (i=0; i<m; i++) {
2981     arow = owners[rank] + i;
2982     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2983     bnzi = bi[i+1] - bi[i];
2984     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2985 
2986     /* add local non-zero vals of this proc's seqmat into ba */
2987     anzi = ai[arow+1] - ai[arow];
2988     aj   = a->j + ai[arow];
2989     aa   = a->a + ai[arow];
2990     nextaj = 0;
2991     for (j=0; nextaj<anzi; j++){
2992       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2993         ba_i[j] += aa[nextaj++];
2994       }
2995     }
2996 
2997     /* add received vals into ba */
2998     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2999       /* i-th row */
3000       if (i == *nextrow[k]) {
3001         anzi = *(nextai[k]+1) - *nextai[k];
3002         aj   = buf_rj[k] + *(nextai[k]);
3003         aa   = abuf_r[k] + *(nextai[k]);
3004         nextaj = 0;
3005         for (j=0; nextaj<anzi; j++){
3006           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3007             ba_i[j] += aa[nextaj++];
3008           }
3009         }
3010         nextrow[k]++; nextai[k]++;
3011       }
3012     }
3013     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3014   }
3015   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3016   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3017 
3018   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3019   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3020   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3021   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3022   PetscFunctionReturn(0);
3023 }
3024 static PetscEvent logkey_seqstompisym = 0;
3025 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3026 {
3027   PetscErrorCode       ierr;
3028   Mat                  B_mpi;
3029   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3030   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3031   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3032   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3033   PetscInt             len,proc,*dnz,*onz;
3034   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3035   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3036   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3037   MPI_Status           *status;
3038   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3039   PetscBT              lnkbt;
3040   Mat_Merge_SeqsToMPI  *merge;
3041   PetscObjectContainer container;
3042 
3043   PetscFunctionBegin;
3044   if (!logkey_seqstompisym) {
3045     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3046   }
3047   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3048 
3049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3051 
3052   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3053   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3054 
3055   /* determine row ownership */
3056   /*---------------------------------------------------------*/
3057   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3058   if (m == PETSC_DECIDE) {
3059     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3060   } else {
3061     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3062   }
3063   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3064   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3065   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3066 
3067   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3068   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3069 
3070   /* determine the number of messages to send, their lengths */
3071   /*---------------------------------------------------------*/
3072   len_s  = merge->len_s;
3073 
3074   len = 0;  /* length of buf_si[] */
3075   merge->nsend = 0;
3076   for (proc=0; proc<size; proc++){
3077     len_si[proc] = 0;
3078     if (proc == rank){
3079       len_s[proc] = 0;
3080     } else {
3081       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3082       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3083     }
3084     if (len_s[proc]) {
3085       merge->nsend++;
3086       nrows = 0;
3087       for (i=owners[proc]; i<owners[proc+1]; i++){
3088         if (ai[i+1] > ai[i]) nrows++;
3089       }
3090       len_si[proc] = 2*(nrows+1);
3091       len += len_si[proc];
3092     }
3093   }
3094 
3095   /* determine the number and length of messages to receive for ij-structure */
3096   /*-------------------------------------------------------------------------*/
3097   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3098   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3099 
3100   /* post the Irecv of j-structure */
3101   /*-------------------------------*/
3102   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3103   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3104 
3105   /* post the Isend of j-structure */
3106   /*--------------------------------*/
3107   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3108   sj_waits = si_waits + merge->nsend;
3109 
3110   for (proc=0, k=0; proc<size; proc++){
3111     if (!len_s[proc]) continue;
3112     i = owners[proc];
3113     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3114     k++;
3115   }
3116 
3117   /* receives and sends of j-structure are complete */
3118   /*------------------------------------------------*/
3119   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3120   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3121 
3122   /* send and recv i-structure */
3123   /*---------------------------*/
3124   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3125   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3126 
3127   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3128   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3129   for (proc=0,k=0; proc<size; proc++){
3130     if (!len_s[proc]) continue;
3131     /* form outgoing message for i-structure:
3132          buf_si[0]:                 nrows to be sent
3133                [1:nrows]:           row index (global)
3134                [nrows+1:2*nrows+1]: i-structure index
3135     */
3136     /*-------------------------------------------*/
3137     nrows = len_si[proc]/2 - 1;
3138     buf_si_i    = buf_si + nrows+1;
3139     buf_si[0]   = nrows;
3140     buf_si_i[0] = 0;
3141     nrows = 0;
3142     for (i=owners[proc]; i<owners[proc+1]; i++){
3143       anzi = ai[i+1] - ai[i];
3144       if (anzi) {
3145         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3146         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3147         nrows++;
3148       }
3149     }
3150     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3151     k++;
3152     buf_si += len_si[proc];
3153   }
3154 
3155   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3156   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3157 
3158   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3159   for (i=0; i<merge->nrecv; i++){
3160     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3161   }
3162 
3163   ierr = PetscFree(len_si);CHKERRQ(ierr);
3164   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3165   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3166   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3167   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3168   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3169   ierr = PetscFree(status);CHKERRQ(ierr);
3170 
3171   /* compute a local seq matrix in each processor */
3172   /*----------------------------------------------*/
3173   /* allocate bi array and free space for accumulating nonzero column info */
3174   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3175   bi[0] = 0;
3176 
3177   /* create and initialize a linked list */
3178   nlnk = N+1;
3179   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3180 
3181   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3182   len = 0;
3183   len  = ai[owners[rank+1]] - ai[owners[rank]];
3184   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3185   current_space = free_space;
3186 
3187   /* determine symbolic info for each local row */
3188   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3189   nextrow = buf_ri_k + merge->nrecv;
3190   nextai  = nextrow + merge->nrecv;
3191   for (k=0; k<merge->nrecv; k++){
3192     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3193     nrows = *buf_ri_k[k];
3194     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3195     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3196   }
3197 
3198   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3199   len = 0;
3200   for (i=0;i<m;i++) {
3201     bnzi   = 0;
3202     /* add local non-zero cols of this proc's seqmat into lnk */
3203     arow   = owners[rank] + i;
3204     anzi   = ai[arow+1] - ai[arow];
3205     aj     = a->j + ai[arow];
3206     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3207     bnzi += nlnk;
3208     /* add received col data into lnk */
3209     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3210       if (i == *nextrow[k]) { /* i-th row */
3211         anzi = *(nextai[k]+1) - *nextai[k];
3212         aj   = buf_rj[k] + *nextai[k];
3213         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3214         bnzi += nlnk;
3215         nextrow[k]++; nextai[k]++;
3216       }
3217     }
3218     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3219 
3220     /* if free space is not available, make more free space */
3221     if (current_space->local_remaining<bnzi) {
3222       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3223       nspacedouble++;
3224     }
3225     /* copy data into free space, then initialize lnk */
3226     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3227     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3228 
3229     current_space->array           += bnzi;
3230     current_space->local_used      += bnzi;
3231     current_space->local_remaining -= bnzi;
3232 
3233     bi[i+1] = bi[i] + bnzi;
3234   }
3235 
3236   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3237 
3238   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3239   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3240   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3241 
3242   /* create symbolic parallel matrix B_mpi */
3243   /*---------------------------------------*/
3244   if (n==PETSC_DECIDE) {
3245     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3246   } else {
3247     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3248   }
3249   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3250   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3251   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3252 
3253   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3254   B_mpi->assembled     = PETSC_FALSE;
3255   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3256   merge->bi            = bi;
3257   merge->bj            = bj;
3258   merge->buf_ri        = buf_ri;
3259   merge->buf_rj        = buf_rj;
3260   merge->coi           = PETSC_NULL;
3261   merge->coj           = PETSC_NULL;
3262   merge->owners_co     = PETSC_NULL;
3263 
3264   /* attach the supporting struct to B_mpi for reuse */
3265   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3266   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3267   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3268   *mpimat = B_mpi;
3269   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3270   PetscFunctionReturn(0);
3271 }
3272 
3273 static PetscEvent logkey_seqstompi = 0;
3274 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3275 {
3276   PetscErrorCode   ierr;
3277 
3278   PetscFunctionBegin;
3279   if (!logkey_seqstompi) {
3280     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3281   }
3282   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3283   if (scall == MAT_INITIAL_MATRIX){
3284     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3285   }
3286   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3287   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3288   PetscFunctionReturn(0);
3289 }
3290 static PetscEvent logkey_getlocalmat = 0;
3291 #undef __FUNCT__
3292 #define __FUNCT__ "MatGetLocalMat"
3293 /*@C
3294      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3295 
3296     Not Collective
3297 
3298    Input Parameters:
3299 +    A - the matrix
3300 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3301 
3302    Output Parameter:
3303 .    A_loc - the local sequential matrix generated
3304 
3305     Level: developer
3306 
3307 @*/
3308 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3309 {
3310   PetscErrorCode  ierr;
3311   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3312   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3313   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3314   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3315   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3316   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3317 
3318   PetscFunctionBegin;
3319   if (!logkey_getlocalmat) {
3320     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3321   }
3322   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3323   if (scall == MAT_INITIAL_MATRIX){
3324     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3325     ci[0] = 0;
3326     for (i=0; i<am; i++){
3327       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3328     }
3329     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3330     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3331     k = 0;
3332     for (i=0; i<am; i++) {
3333       ncols_o = bi[i+1] - bi[i];
3334       ncols_d = ai[i+1] - ai[i];
3335       /* off-diagonal portion of A */
3336       for (jo=0; jo<ncols_o; jo++) {
3337         col = cmap[*bj];
3338         if (col >= cstart) break;
3339         cj[k]   = col; bj++;
3340         ca[k++] = *ba++;
3341       }
3342       /* diagonal portion of A */
3343       for (j=0; j<ncols_d; j++) {
3344         cj[k]   = cstart + *aj++;
3345         ca[k++] = *aa++;
3346       }
3347       /* off-diagonal portion of A */
3348       for (j=jo; j<ncols_o; j++) {
3349         cj[k]   = cmap[*bj++];
3350         ca[k++] = *ba++;
3351       }
3352     }
3353     /* put together the new matrix */
3354     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3355     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3356     /* Since these are PETSc arrays, change flags to free them as necessary. */
3357     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3358     mat->freedata = PETSC_TRUE;
3359     mat->nonew    = 0;
3360   } else if (scall == MAT_REUSE_MATRIX){
3361     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3362     ci = mat->i; cj = mat->j; ca = mat->a;
3363     for (i=0; i<am; i++) {
3364       /* off-diagonal portion of A */
3365       ncols_o = bi[i+1] - bi[i];
3366       for (jo=0; jo<ncols_o; jo++) {
3367         col = cmap[*bj];
3368         if (col >= cstart) break;
3369         *ca++ = *ba++; bj++;
3370       }
3371       /* diagonal portion of A */
3372       ncols_d = ai[i+1] - ai[i];
3373       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3374       /* off-diagonal portion of A */
3375       for (j=jo; j<ncols_o; j++) {
3376         *ca++ = *ba++; bj++;
3377       }
3378     }
3379   } else {
3380     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3381   }
3382 
3383   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 static PetscEvent logkey_getlocalmatcondensed = 0;
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatGetLocalMatCondensed"
3390 /*@C
3391      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3392 
3393     Not Collective
3394 
3395    Input Parameters:
3396 +    A - the matrix
3397 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3398 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3399 
3400    Output Parameter:
3401 .    A_loc - the local sequential matrix generated
3402 
3403     Level: developer
3404 
3405 @*/
3406 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3407 {
3408   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3409   PetscErrorCode    ierr;
3410   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3411   IS                isrowa,iscola;
3412   Mat               *aloc;
3413 
3414   PetscFunctionBegin;
3415   if (!logkey_getlocalmatcondensed) {
3416     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3417   }
3418   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3419   if (!row){
3420     start = a->rstart; end = a->rend;
3421     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3422   } else {
3423     isrowa = *row;
3424   }
3425   if (!col){
3426     start = a->cstart;
3427     cmap  = a->garray;
3428     nzA   = a->A->n;
3429     nzB   = a->B->n;
3430     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3431     ncols = 0;
3432     for (i=0; i<nzB; i++) {
3433       if (cmap[i] < start) idx[ncols++] = cmap[i];
3434       else break;
3435     }
3436     imark = i;
3437     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3438     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3439     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3440     ierr = PetscFree(idx);CHKERRQ(ierr);
3441   } else {
3442     iscola = *col;
3443   }
3444   if (scall != MAT_INITIAL_MATRIX){
3445     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3446     aloc[0] = *A_loc;
3447   }
3448   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3449   *A_loc = aloc[0];
3450   ierr = PetscFree(aloc);CHKERRQ(ierr);
3451   if (!row){
3452     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3453   }
3454   if (!col){
3455     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3456   }
3457   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 static PetscEvent logkey_GetBrowsOfAcols = 0;
3462 #undef __FUNCT__
3463 #define __FUNCT__ "MatGetBrowsOfAcols"
3464 /*@C
3465     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3466 
3467     Collective on Mat
3468 
3469    Input Parameters:
3470 +    A,B - the matrices in mpiaij format
3471 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3472 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3473 
3474    Output Parameter:
3475 +    rowb, colb - index sets of rows and columns of B to extract
3476 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3477 -    B_seq - the sequential matrix generated
3478 
3479     Level: developer
3480 
3481 @*/
3482 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3483 {
3484   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3485   PetscErrorCode    ierr;
3486   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3487   IS                isrowb,iscolb;
3488   Mat               *bseq;
3489 
3490   PetscFunctionBegin;
3491   if (a->cstart != b->rstart || a->cend != b->rend){
3492     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3493   }
3494   if (!logkey_GetBrowsOfAcols) {
3495     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3496   }
3497   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3498 
3499   if (scall == MAT_INITIAL_MATRIX){
3500     start = a->cstart;
3501     cmap  = a->garray;
3502     nzA   = a->A->n;
3503     nzB   = a->B->n;
3504     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3505     ncols = 0;
3506     for (i=0; i<nzB; i++) {  /* row < local row index */
3507       if (cmap[i] < start) idx[ncols++] = cmap[i];
3508       else break;
3509     }
3510     imark = i;
3511     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3512     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3513     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3514     ierr = PetscFree(idx);CHKERRQ(ierr);
3515     *brstart = imark;
3516     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3517   } else {
3518     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3519     isrowb = *rowb; iscolb = *colb;
3520     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3521     bseq[0] = *B_seq;
3522   }
3523   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3524   *B_seq = bseq[0];
3525   ierr = PetscFree(bseq);CHKERRQ(ierr);
3526   if (!rowb){
3527     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3528   } else {
3529     *rowb = isrowb;
3530   }
3531   if (!colb){
3532     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3533   } else {
3534     *colb = iscolb;
3535   }
3536   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 static PetscEvent logkey_GetBrowsOfAocols = 0;
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatGetBrowsOfAoCols"
3543 /*@C
3544     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3545     of the OFF-DIAGONAL portion of local A
3546 
3547     Collective on Mat
3548 
3549    Input Parameters:
3550 +    A,B - the matrices in mpiaij format
3551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3552 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3553 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3554 
3555    Output Parameter:
3556 +    B_oth - the sequential matrix generated
3557 
3558     Level: developer
3559 
3560 @*/
3561 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3562 {
3563   VecScatter_MPI_General *gen_to,*gen_from;
3564   PetscErrorCode         ierr;
3565   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3566   Mat_SeqAIJ             *b_oth;
3567   VecScatter             ctx=a->Mvctx;
3568   MPI_Comm               comm=ctx->comm;
3569   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3570   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3571   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3572   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3573   MPI_Request            *rwaits,*swaits;
3574   MPI_Status             *sstatus,rstatus;
3575   PetscInt               *cols;
3576   PetscScalar            *vals;
3577   PetscMPIInt            j;
3578 
3579   PetscFunctionBegin;
3580   if (a->cstart != b->rstart || a->cend != b->rend){
3581     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3582   }
3583   if (!logkey_GetBrowsOfAocols) {
3584     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3585   }
3586   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3588 
3589   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3590   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3591   rvalues  = gen_from->values; /* holds the length of sending row */
3592   svalues  = gen_to->values;   /* holds the length of receiving row */
3593   nrecvs   = gen_from->n;
3594   nsends   = gen_to->n;
3595   rwaits   = gen_from->requests;
3596   swaits   = gen_to->requests;
3597   srow     = gen_to->indices;   /* local row index to be sent */
3598   rstarts  = gen_from->starts;
3599   sstarts  = gen_to->starts;
3600   rprocs   = gen_from->procs;
3601   sprocs   = gen_to->procs;
3602   sstatus  = gen_to->sstatus;
3603 
3604   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3605   if (scall == MAT_INITIAL_MATRIX){
3606     /* i-array */
3607     /*---------*/
3608     /*  post receives */
3609     for (i=0; i<nrecvs; i++){
3610       rowlen = (PetscInt*)rvalues + rstarts[i];
3611       nrows = rstarts[i+1]-rstarts[i];
3612       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3613     }
3614 
3615     /* pack the outgoing message */
3616     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3617     rstartsj = sstartsj + nsends +1;
3618     sstartsj[0] = 0;  rstartsj[0] = 0;
3619     len = 0; /* total length of j or a array to be sent */
3620     k = 0;
3621     for (i=0; i<nsends; i++){
3622       rowlen = (PetscInt*)svalues + sstarts[i];
3623       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3624       for (j=0; j<nrows; j++) {
3625         row = srow[k] + b->rowners[rank]; /* global row idx */
3626         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3627         len += rowlen[j];
3628         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3629         k++;
3630       }
3631       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3632        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3633     }
3634     /* recvs and sends of i-array are completed */
3635     i = nrecvs;
3636     while (i--) {
3637       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3638     }
3639     if (nsends) {
3640       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3641     }
3642     /* allocate buffers for sending j and a arrays */
3643     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3644     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3645 
3646     /* create i-array of B_oth */
3647     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3648     b_othi[0] = 0;
3649     len = 0; /* total length of j or a array to be received */
3650     k = 0;
3651     for (i=0; i<nrecvs; i++){
3652       rowlen = (PetscInt*)rvalues + rstarts[i];
3653       nrows = rstarts[i+1]-rstarts[i];
3654       for (j=0; j<nrows; j++) {
3655         b_othi[k+1] = b_othi[k] + rowlen[j];
3656         len += rowlen[j]; k++;
3657       }
3658       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3659     }
3660 
3661     /* allocate space for j and a arrrays of B_oth */
3662     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3663     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3664 
3665     /* j-array */
3666     /*---------*/
3667     /*  post receives of j-array */
3668     for (i=0; i<nrecvs; i++){
3669       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3670       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3671     }
3672     k = 0;
3673     for (i=0; i<nsends; i++){
3674       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3675       bufJ = bufj+sstartsj[i];
3676       for (j=0; j<nrows; j++) {
3677         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3678         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3679         for (l=0; l<ncols; l++){
3680           *bufJ++ = cols[l];
3681         }
3682         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3683       }
3684       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3685     }
3686 
3687     /* recvs and sends of j-array are completed */
3688     i = nrecvs;
3689     while (i--) {
3690       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3691     }
3692     if (nsends) {
3693       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3694     }
3695   } else if (scall == MAT_REUSE_MATRIX){
3696     sstartsj = *startsj;
3697     rstartsj = sstartsj + nsends +1;
3698     bufa     = *bufa_ptr;
3699     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3700     b_otha   = b_oth->a;
3701   } else {
3702     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3703   }
3704 
3705   /* a-array */
3706   /*---------*/
3707   /*  post receives of a-array */
3708   for (i=0; i<nrecvs; i++){
3709     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3710     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3711   }
3712   k = 0;
3713   for (i=0; i<nsends; i++){
3714     nrows = sstarts[i+1]-sstarts[i];
3715     bufA = bufa+sstartsj[i];
3716     for (j=0; j<nrows; j++) {
3717       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3718       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3719       for (l=0; l<ncols; l++){
3720         *bufA++ = vals[l];
3721       }
3722       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3723 
3724     }
3725     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3726   }
3727   /* recvs and sends of a-array are completed */
3728   i = nrecvs;
3729   while (i--) {
3730     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3731   }
3732    if (nsends) {
3733     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3734   }
3735 
3736   if (scall == MAT_INITIAL_MATRIX){
3737     /* put together the new matrix */
3738     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3739 
3740     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3741     /* Since these are PETSc arrays, change flags to free them as necessary. */
3742     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3743     b_oth->freedata = PETSC_TRUE;
3744     b_oth->nonew    = 0;
3745 
3746     ierr = PetscFree(bufj);CHKERRQ(ierr);
3747     if (!startsj || !bufa_ptr){
3748       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3749       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3750     } else {
3751       *startsj  = sstartsj;
3752       *bufa_ptr = bufa;
3753     }
3754   }
3755   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3756 
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 /*MC
3761    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3762 
3763    Options Database Keys:
3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3765 
3766   Level: beginner
3767 
3768 .seealso: MatCreateMPIAIJ
3769 M*/
3770 
3771 EXTERN_C_BEGIN
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatCreate_MPIAIJ"
3774 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3775 {
3776   Mat_MPIAIJ     *b;
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   PetscMPIInt    size;
3780 
3781   PetscFunctionBegin;
3782   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3783 
3784   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3785   B->data         = (void*)b;
3786   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3787   B->factor       = 0;
3788   B->bs           = 1;
3789   B->assembled    = PETSC_FALSE;
3790   B->mapping      = 0;
3791 
3792   B->insertmode      = NOT_SET_VALUES;
3793   b->size            = size;
3794   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3795 
3796   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3797   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3798 
3799   /* the information in the maps duplicates the information computed below, eventually
3800      we should remove the duplicate information that is not contained in the maps */
3801   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3802   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3803 
3804   /* build local table of row and column ownerships */
3805   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3806   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3807   b->cowners = b->rowners + b->size + 2;
3808   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3809   b->rowners[0] = 0;
3810   for (i=2; i<=b->size; i++) {
3811     b->rowners[i] += b->rowners[i-1];
3812   }
3813   b->rstart = b->rowners[b->rank];
3814   b->rend   = b->rowners[b->rank+1];
3815   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3816   b->cowners[0] = 0;
3817   for (i=2; i<=b->size; i++) {
3818     b->cowners[i] += b->cowners[i-1];
3819   }
3820   b->cstart = b->cowners[b->rank];
3821   b->cend   = b->cowners[b->rank+1];
3822 
3823   /* build cache for off array entries formed */
3824   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3825   b->donotstash  = PETSC_FALSE;
3826   b->colmap      = 0;
3827   b->garray      = 0;
3828   b->roworiented = PETSC_TRUE;
3829 
3830   /* stuff used for matrix vector multiply */
3831   b->lvec      = PETSC_NULL;
3832   b->Mvctx     = PETSC_NULL;
3833 
3834   /* stuff for MatGetRow() */
3835   b->rowindices   = 0;
3836   b->rowvalues    = 0;
3837   b->getrowactive = PETSC_FALSE;
3838 
3839   /* Explicitly create 2 MATSEQAIJ matrices. */
3840   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3841   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3842   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3843   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3844   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3845   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3846 
3847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3848                                      "MatStoreValues_MPIAIJ",
3849                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3851                                      "MatRetrieveValues_MPIAIJ",
3852                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3854 				     "MatGetDiagonalBlock_MPIAIJ",
3855                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3857 				     "MatIsTranspose_MPIAIJ",
3858 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3860 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3861 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3863 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3864 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3866 				     "MatDiagonalScaleLocal_MPIAIJ",
3867 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 EXTERN_C_END
3871