xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 97c2bf28463f5cea7c8d86019a1388ec46183b29)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatView_MPIAIJ_Binary"
753 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
754 {
755   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
756   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
757   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
758   PetscErrorCode    ierr;
759   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
760   int               fd;
761   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
762   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
763   PetscScalar       *column_values;
764 
765   PetscFunctionBegin;
766   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
767   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
768   nz   = A->nz + B->nz;
769   if (!rank) {
770     header[0] = MAT_FILE_COOKIE;
771     header[1] = mat->M;
772     header[2] = mat->N;
773     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
775     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
776     /* get largest number of rows any processor has */
777     rlen = mat->m;
778     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
779     for (i=1; i<size; i++) {
780       rlen = PetscMax(rlen,range[i+1] - range[i]);
781     }
782   } else {
783     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
784     rlen = mat->m;
785   }
786 
787   /* load up the local row counts */
788   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
789   for (i=0; i<mat->m; i++) {
790     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
791   }
792 
793   /* store the row lengths to the file */
794   if (!rank) {
795     MPI_Status status;
796     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
797     for (i=1; i<size; i++) {
798       rlen = range[i+1] - range[i];
799       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
800       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
801     }
802   } else {
803     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
806 
807   /* load up the local column indices */
808   nzmax = nz; /* )th processor needs space a largest processor needs */
809   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
810   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
811   cnt  = 0;
812   for (i=0; i<mat->m; i++) {
813     for (j=B->i[i]; j<B->i[i+1]; j++) {
814       if ( (col = garray[B->j[j]]) > cstart) break;
815       column_indices[cnt++] = col;
816     }
817     for (k=A->i[i]; k<A->i[i+1]; k++) {
818       column_indices[cnt++] = A->j[k] + cstart;
819     }
820     for (; j<B->i[i+1]; j++) {
821       column_indices[cnt++] = garray[B->j[j]];
822     }
823   }
824   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
825 
826   /* store the column indices to the file */
827   if (!rank) {
828     MPI_Status status;
829     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
830     for (i=1; i<size; i++) {
831       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
833       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
834       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
835     }
836   } else {
837     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
838     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
839   }
840   ierr = PetscFree(column_indices);CHKERRQ(ierr);
841 
842   /* load up the local column values */
843   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
844   cnt  = 0;
845   for (i=0; i<mat->m; i++) {
846     for (j=B->i[i]; j<B->i[i+1]; j++) {
847       if ( garray[B->j[j]] > cstart) break;
848       column_values[cnt++] = B->a[j];
849     }
850     for (k=A->i[i]; k<A->i[i+1]; k++) {
851       column_values[cnt++] = A->a[k];
852     }
853     for (; j<B->i[i+1]; j++) {
854       column_values[cnt++] = B->a[j];
855     }
856   }
857   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
858 
859   /* store the column values to the file */
860   if (!rank) {
861     MPI_Status status;
862     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
863     for (i=1; i<size; i++) {
864       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
866       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
867       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
868     }
869   } else {
870     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
871     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
872   }
873   ierr = PetscFree(column_values);CHKERRQ(ierr);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
879 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
880 {
881   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
882   PetscErrorCode    ierr;
883   PetscMPIInt       rank = aij->rank,size = aij->size;
884   PetscTruth        isdraw,iascii,flg,isbinary;
885   PetscViewer       sviewer;
886   PetscViewerFormat format;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
892   if (iascii) {
893     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
894     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
895       MatInfo info;
896       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
897       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
899       if (flg) {
900         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
901 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
902       } else {
903         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
904 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
905       }
906       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
908       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
909       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
910       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
911       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_INFO) {
914       PetscFunctionReturn(0);
915     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
916       PetscFunctionReturn(0);
917     }
918   } else if (isbinary) {
919     if (size == 1) {
920       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
921       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
922     } else {
923       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
924     }
925     PetscFunctionReturn(0);
926   } else if (isdraw) {
927     PetscDraw  draw;
928     PetscTruth isnull;
929     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
930     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
931   }
932 
933   if (size == 1) {
934     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
935     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
936   } else {
937     /* assemble the entire matrix onto first processor. */
938     Mat         A;
939     Mat_SeqAIJ *Aloc;
940     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
941     PetscScalar *a;
942 
943     if (!rank) {
944       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
945     } else {
946       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     PetscLogObjectParent(mat,A);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = aij->rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = aij->rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1019 {
1020   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec          bb1;
1023   PetscScalar  mone=-1.0;
1024 
1025   PetscFunctionBegin;
1026   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1027 
1028   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1029 
1030   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1031     if (flag & SOR_ZERO_INITIAL_GUESS) {
1032       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1033       its--;
1034     }
1035 
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1066     if (flag & SOR_ZERO_INITIAL_GUESS) {
1067       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1068       its--;
1069     }
1070     while (its--) {
1071       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073 
1074       /* update rhs: bb1 = bb - B*x */
1075       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1076       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1077 
1078       /* local sweep */
1079       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1080       CHKERRQ(ierr);
1081     }
1082   } else {
1083     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1084   }
1085 
1086   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1092 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1093 {
1094   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1095   Mat        A = mat->A,B = mat->B;
1096   PetscErrorCode ierr;
1097   PetscReal  isend[5],irecv[5];
1098 
1099   PetscFunctionBegin;
1100   info->block_size     = 1.0;
1101   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1102   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1103   isend[3] = info->memory;  isend[4] = info->mallocs;
1104   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1105   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1106   isend[3] += info->memory;  isend[4] += info->mallocs;
1107   if (flag == MAT_LOCAL) {
1108     info->nz_used      = isend[0];
1109     info->nz_allocated = isend[1];
1110     info->nz_unneeded  = isend[2];
1111     info->memory       = isend[3];
1112     info->mallocs      = isend[4];
1113   } else if (flag == MAT_GLOBAL_MAX) {
1114     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1115     info->nz_used      = irecv[0];
1116     info->nz_allocated = irecv[1];
1117     info->nz_unneeded  = irecv[2];
1118     info->memory       = irecv[3];
1119     info->mallocs      = irecv[4];
1120   } else if (flag == MAT_GLOBAL_SUM) {
1121     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1122     info->nz_used      = irecv[0];
1123     info->nz_allocated = irecv[1];
1124     info->nz_unneeded  = irecv[2];
1125     info->memory       = irecv[3];
1126     info->mallocs      = irecv[4];
1127   }
1128   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1129   info->fill_ratio_needed = 0;
1130   info->factor_mallocs    = 0;
1131   info->rows_global       = (double)matin->M;
1132   info->columns_global    = (double)matin->N;
1133   info->rows_local        = (double)matin->m;
1134   info->columns_local     = (double)matin->N;
1135 
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatSetOption_MPIAIJ"
1141 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1142 {
1143   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1144   PetscErrorCode ierr;
1145 
1146   PetscFunctionBegin;
1147   switch (op) {
1148   case MAT_NO_NEW_NONZERO_LOCATIONS:
1149   case MAT_YES_NEW_NONZERO_LOCATIONS:
1150   case MAT_COLUMNS_UNSORTED:
1151   case MAT_COLUMNS_SORTED:
1152   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1153   case MAT_KEEP_ZEROED_ROWS:
1154   case MAT_NEW_NONZERO_LOCATION_ERR:
1155   case MAT_USE_INODES:
1156   case MAT_DO_NOT_USE_INODES:
1157   case MAT_IGNORE_ZERO_ENTRIES:
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_ROW_ORIENTED:
1162     a->roworiented = PETSC_TRUE;
1163     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1164     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1165     break;
1166   case MAT_ROWS_SORTED:
1167   case MAT_ROWS_UNSORTED:
1168   case MAT_YES_NEW_DIAGONALS:
1169     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1170     break;
1171   case MAT_COLUMN_ORIENTED:
1172     a->roworiented = PETSC_FALSE;
1173     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1174     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1175     break;
1176   case MAT_IGNORE_OFF_PROC_ENTRIES:
1177     a->donotstash = PETSC_TRUE;
1178     break;
1179   case MAT_NO_NEW_DIAGONALS:
1180     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1181   case MAT_SYMMETRIC:
1182   case MAT_STRUCTURALLY_SYMMETRIC:
1183   case MAT_HERMITIAN:
1184   case MAT_SYMMETRY_ETERNAL:
1185     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_NOT_HERMITIAN:
1190   case MAT_NOT_SYMMETRY_ETERNAL:
1191     break;
1192   default:
1193     SETERRQ(PETSC_ERR_SUP,"unknown option");
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatGetRow_MPIAIJ"
1200 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1201 {
1202   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1203   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1204   PetscErrorCode ierr;
1205   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1206   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1207   int          *cmap,*idx_p;
1208 
1209   PetscFunctionBegin;
1210   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1211   mat->getrowactive = PETSC_TRUE;
1212 
1213   if (!mat->rowvalues && (idx || v)) {
1214     /*
1215         allocate enough space to hold information from the longest row.
1216     */
1217     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1218     int     max = 1,tmp;
1219     for (i=0; i<matin->m; i++) {
1220       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1221       if (max < tmp) { max = tmp; }
1222     }
1223     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1224     mat->rowindices = (int*)(mat->rowvalues + max);
1225   }
1226 
1227   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1228   lrow = row - rstart;
1229 
1230   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1231   if (!v)   {pvA = 0; pvB = 0;}
1232   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1233   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1234   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1235   nztot = nzA + nzB;
1236 
1237   cmap  = mat->garray;
1238   if (v  || idx) {
1239     if (nztot) {
1240       /* Sort by increasing column numbers, assuming A and B already sorted */
1241       int imark = -1;
1242       if (v) {
1243         *v = v_p = mat->rowvalues;
1244         for (i=0; i<nzB; i++) {
1245           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1246           else break;
1247         }
1248         imark = i;
1249         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1250         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1251       }
1252       if (idx) {
1253         *idx = idx_p = mat->rowindices;
1254         if (imark > -1) {
1255           for (i=0; i<imark; i++) {
1256             idx_p[i] = cmap[cworkB[i]];
1257           }
1258         } else {
1259           for (i=0; i<nzB; i++) {
1260             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1261             else break;
1262           }
1263           imark = i;
1264         }
1265         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1266         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1267       }
1268     } else {
1269       if (idx) *idx = 0;
1270       if (v)   *v   = 0;
1271     }
1272   }
1273   *nz = nztot;
1274   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1275   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1281 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1282 {
1283   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1284 
1285   PetscFunctionBegin;
1286   if (aij->getrowactive == PETSC_FALSE) {
1287     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1288   }
1289   aij->getrowactive = PETSC_FALSE;
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatNorm_MPIAIJ"
1295 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1296 {
1297   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1298   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1299   PetscErrorCode ierr;
1300   int          i,j,cstart = aij->cstart;
1301   PetscReal    sum = 0.0;
1302   PetscScalar  *v;
1303 
1304   PetscFunctionBegin;
1305   if (aij->size == 1) {
1306     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1307   } else {
1308     if (type == NORM_FROBENIUS) {
1309       v = amat->a;
1310       for (i=0; i<amat->nz; i++) {
1311 #if defined(PETSC_USE_COMPLEX)
1312         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1313 #else
1314         sum += (*v)*(*v); v++;
1315 #endif
1316       }
1317       v = bmat->a;
1318       for (i=0; i<bmat->nz; i++) {
1319 #if defined(PETSC_USE_COMPLEX)
1320         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1321 #else
1322         sum += (*v)*(*v); v++;
1323 #endif
1324       }
1325       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1326       *norm = sqrt(*norm);
1327     } else if (type == NORM_1) { /* max column norm */
1328       PetscReal *tmp,*tmp2;
1329       int    *jj,*garray = aij->garray;
1330       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1331       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1332       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1333       *norm = 0.0;
1334       v = amat->a; jj = amat->j;
1335       for (j=0; j<amat->nz; j++) {
1336         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1337       }
1338       v = bmat->a; jj = bmat->j;
1339       for (j=0; j<bmat->nz; j++) {
1340         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1341       }
1342       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1343       for (j=0; j<mat->N; j++) {
1344         if (tmp2[j] > *norm) *norm = tmp2[j];
1345       }
1346       ierr = PetscFree(tmp);CHKERRQ(ierr);
1347       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1348     } else if (type == NORM_INFINITY) { /* max row norm */
1349       PetscReal ntemp = 0.0;
1350       for (j=0; j<aij->A->m; j++) {
1351         v = amat->a + amat->i[j];
1352         sum = 0.0;
1353         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1354           sum += PetscAbsScalar(*v); v++;
1355         }
1356         v = bmat->a + bmat->i[j];
1357         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1358           sum += PetscAbsScalar(*v); v++;
1359         }
1360         if (sum > ntemp) ntemp = sum;
1361       }
1362       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1363     } else {
1364       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1365     }
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatTranspose_MPIAIJ"
1372 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1373 {
1374   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1375   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1376   PetscErrorCode ierr;
1377   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1378   Mat          B;
1379   PetscScalar  *array;
1380 
1381   PetscFunctionBegin;
1382   if (!matout && M != N) {
1383     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1384   }
1385 
1386   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1387   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1388   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1389 
1390   /* copy over the A part */
1391   Aloc = (Mat_SeqAIJ*)a->A->data;
1392   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1393   row = a->rstart;
1394   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1395   for (i=0; i<m; i++) {
1396     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1397     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1398   }
1399   aj = Aloc->j;
1400   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1401 
1402   /* copy over the B part */
1403   Aloc = (Mat_SeqAIJ*)a->B->data;
1404   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1405   row  = a->rstart;
1406   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1407   ct   = cols;
1408   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1409   for (i=0; i<m; i++) {
1410     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1411     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1412   }
1413   ierr = PetscFree(ct);CHKERRQ(ierr);
1414   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1415   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1416   if (matout) {
1417     *matout = B;
1418   } else {
1419     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1426 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1427 {
1428   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1429   Mat        a = aij->A,b = aij->B;
1430   PetscErrorCode ierr;
1431   int        s1,s2,s3;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1435   if (rr) {
1436     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1437     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1438     /* Overlap communication with computation. */
1439     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1440   }
1441   if (ll) {
1442     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1443     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1444     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1445   }
1446   /* scale  the diagonal block */
1447   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1448 
1449   if (rr) {
1450     /* Do a scatter end and then right scale the off-diagonal block */
1451     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1452     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1453   }
1454 
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1461 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1462 {
1463   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1464   PetscErrorCode ierr;
1465 
1466   PetscFunctionBegin;
1467   if (!a->rank) {
1468     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1475 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1476 {
1477   PetscFunctionBegin;
1478   *bs = 1;
1479   PetscFunctionReturn(0);
1480 }
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1483 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1484 {
1485   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatEqual_MPIAIJ"
1495 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1496 {
1497   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1498   Mat        a,b,c,d;
1499   PetscTruth flg;
1500   PetscErrorCode ierr;
1501 
1502   PetscFunctionBegin;
1503   a = matA->A; b = matA->B;
1504   c = matB->A; d = matB->B;
1505 
1506   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1507   if (flg == PETSC_TRUE) {
1508     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1509   }
1510   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatCopy_MPIAIJ"
1516 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1517 {
1518   PetscErrorCode ierr;
1519   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1520   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1521 
1522   PetscFunctionBegin;
1523   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1524   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1525     /* because of the column compression in the off-processor part of the matrix a->B,
1526        the number of columns in a->B and b->B may be different, hence we cannot call
1527        the MatCopy() directly on the two parts. If need be, we can provide a more
1528        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1529        then copying the submatrices */
1530     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1531   } else {
1532     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1533     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1540 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1541 {
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #include "petscblaslapack.h"
1550 #undef __FUNCT__
1551 #define __FUNCT__ "MatAXPY_MPIAIJ"
1552 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1553 {
1554   PetscErrorCode ierr;
1555   int            i;
1556   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1557   PetscBLASInt   bnz,one=1;
1558   Mat_SeqAIJ     *x,*y;
1559 
1560   PetscFunctionBegin;
1561   if (str == SAME_NONZERO_PATTERN) {
1562     x = (Mat_SeqAIJ *)xx->A->data;
1563     y = (Mat_SeqAIJ *)yy->A->data;
1564     bnz = (PetscBLASInt)x->nz;
1565     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1566     x = (Mat_SeqAIJ *)xx->B->data;
1567     y = (Mat_SeqAIJ *)yy->B->data;
1568     bnz = (PetscBLASInt)x->nz;
1569     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1570   } else if (str == SUBSET_NONZERO_PATTERN) {
1571     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1572 
1573     x = (Mat_SeqAIJ *)xx->B->data;
1574     y = (Mat_SeqAIJ *)yy->B->data;
1575     if (y->xtoy && y->XtoY != xx->B) {
1576       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1577       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1578     }
1579     if (!y->xtoy) { /* get xtoy */
1580       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1581       y->XtoY = xx->B;
1582     }
1583     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1584   } else {
1585     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* -------------------------------------------------------------------*/
1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1592        MatGetRow_MPIAIJ,
1593        MatRestoreRow_MPIAIJ,
1594        MatMult_MPIAIJ,
1595 /* 4*/ MatMultAdd_MPIAIJ,
1596        MatMultTranspose_MPIAIJ,
1597        MatMultTransposeAdd_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601 /*10*/ 0,
1602        0,
1603        0,
1604        MatRelax_MPIAIJ,
1605        MatTranspose_MPIAIJ,
1606 /*15*/ MatGetInfo_MPIAIJ,
1607        MatEqual_MPIAIJ,
1608        MatGetDiagonal_MPIAIJ,
1609        MatDiagonalScale_MPIAIJ,
1610        MatNorm_MPIAIJ,
1611 /*20*/ MatAssemblyBegin_MPIAIJ,
1612        MatAssemblyEnd_MPIAIJ,
1613        0,
1614        MatSetOption_MPIAIJ,
1615        MatZeroEntries_MPIAIJ,
1616 /*25*/ MatZeroRows_MPIAIJ,
1617 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1618        MatLUFactorSymbolic_MPIAIJ_TFS,
1619 #else
1620        0,
1621 #endif
1622        0,
1623        0,
1624        0,
1625 /*30*/ MatSetUpPreallocation_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        0,
1630 /*35*/ MatDuplicate_MPIAIJ,
1631        0,
1632        0,
1633        0,
1634        0,
1635 /*40*/ MatAXPY_MPIAIJ,
1636        MatGetSubMatrices_MPIAIJ,
1637        MatIncreaseOverlap_MPIAIJ,
1638        MatGetValues_MPIAIJ,
1639        MatCopy_MPIAIJ,
1640 /*45*/ MatPrintHelp_MPIAIJ,
1641        MatScale_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645 /*50*/ MatGetBlockSize_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649        0,
1650 /*55*/ MatFDColoringCreate_MPIAIJ,
1651        0,
1652        MatSetUnfactored_MPIAIJ,
1653        0,
1654        0,
1655 /*60*/ MatGetSubMatrix_MPIAIJ,
1656        MatDestroy_MPIAIJ,
1657        MatView_MPIAIJ,
1658        MatGetPetscMaps_Petsc,
1659        0,
1660 /*65*/ 0,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*70*/ 0,
1666        0,
1667        MatSetColoring_MPIAIJ,
1668        MatSetValuesAdic_MPIAIJ,
1669        MatSetValuesAdifor_MPIAIJ,
1670 /*75*/ 0,
1671        0,
1672        0,
1673        0,
1674        0,
1675 /*80*/ 0,
1676        0,
1677        0,
1678        0,
1679 /*84*/ MatLoad_MPIAIJ,
1680        0,
1681        0,
1682        0,
1683        0,
1684        0,
1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1688        MatPtAP_MPIAIJ_MPIAIJ,
1689        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1690 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1691        0,
1692        0,
1693        0};
1694 
1695 /* ----------------------------------------------------------------------------------------*/
1696 
1697 EXTERN_C_BEGIN
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1700 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1701 {
1702   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1703   PetscErrorCode ierr;
1704 
1705   PetscFunctionBegin;
1706   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1707   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 EXTERN_C_END
1711 
1712 EXTERN_C_BEGIN
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1715 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1716 {
1717   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1718   PetscErrorCode ierr;
1719 
1720   PetscFunctionBegin;
1721   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1722   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 #include "petscpc.h"
1728 EXTERN_C_BEGIN
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1731 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1732 {
1733   Mat_MPIAIJ   *b;
1734   PetscErrorCode ierr;
1735   int          i;
1736 
1737   PetscFunctionBegin;
1738   B->preallocated = PETSC_TRUE;
1739   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1740   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1741   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1742   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1743   if (d_nnz) {
1744     for (i=0; i<B->m; i++) {
1745       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1746     }
1747   }
1748   if (o_nnz) {
1749     for (i=0; i<B->m; i++) {
1750       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1751     }
1752   }
1753   b = (Mat_MPIAIJ*)B->data;
1754   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1755   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1756 
1757   PetscFunctionReturn(0);
1758 }
1759 EXTERN_C_END
1760 
1761 /*MC
1762    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1763 
1764    Options Database Keys:
1765 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1766 
1767   Level: beginner
1768 
1769 .seealso: MatCreateMPIAIJ
1770 M*/
1771 
1772 EXTERN_C_BEGIN
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatCreate_MPIAIJ"
1775 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1776 {
1777   Mat_MPIAIJ *b;
1778   PetscErrorCode ierr;
1779   int        i,size;
1780 
1781   PetscFunctionBegin;
1782   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1783 
1784   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1785   B->data         = (void*)b;
1786   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1787   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1788   B->factor       = 0;
1789   B->assembled    = PETSC_FALSE;
1790   B->mapping      = 0;
1791 
1792   B->insertmode      = NOT_SET_VALUES;
1793   b->size            = size;
1794   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1795 
1796   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1797   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1798 
1799   /* the information in the maps duplicates the information computed below, eventually
1800      we should remove the duplicate information that is not contained in the maps */
1801   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1802   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1803 
1804   /* build local table of row and column ownerships */
1805   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1806   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1807   b->cowners = b->rowners + b->size + 2;
1808   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1809   b->rowners[0] = 0;
1810   for (i=2; i<=b->size; i++) {
1811     b->rowners[i] += b->rowners[i-1];
1812   }
1813   b->rstart = b->rowners[b->rank];
1814   b->rend   = b->rowners[b->rank+1];
1815   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1816   b->cowners[0] = 0;
1817   for (i=2; i<=b->size; i++) {
1818     b->cowners[i] += b->cowners[i-1];
1819   }
1820   b->cstart = b->cowners[b->rank];
1821   b->cend   = b->cowners[b->rank+1];
1822 
1823   /* build cache for off array entries formed */
1824   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1825   b->donotstash  = PETSC_FALSE;
1826   b->colmap      = 0;
1827   b->garray      = 0;
1828   b->roworiented = PETSC_TRUE;
1829 
1830   /* stuff used for matrix vector multiply */
1831   b->lvec      = PETSC_NULL;
1832   b->Mvctx     = PETSC_NULL;
1833 
1834   /* stuff for MatGetRow() */
1835   b->rowindices   = 0;
1836   b->rowvalues    = 0;
1837   b->getrowactive = PETSC_FALSE;
1838 
1839   /* Explicitly create 2 MATSEQAIJ matrices. */
1840   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1841   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1842   PetscLogObjectParent(B,b->A);
1843   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1844   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1845   PetscLogObjectParent(B,b->B);
1846 
1847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1848                                      "MatStoreValues_MPIAIJ",
1849                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1851                                      "MatRetrieveValues_MPIAIJ",
1852                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1854 				     "MatGetDiagonalBlock_MPIAIJ",
1855                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1857 				     "MatIsTranspose_MPIAIJ",
1858 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1860 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1861 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1863 				     "MatDiagonalScaleLocal_MPIAIJ",
1864 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 EXTERN_C_END
1868 
1869 /*MC
1870    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1871 
1872    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1873    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1874   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1875   for communicators controlling multiple processes.  It is recommended that you call both of
1876   the above preallocation routines for simplicity.
1877 
1878    Options Database Keys:
1879 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1880 
1881   Level: beginner
1882 
1883 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1884 M*/
1885 
1886 EXTERN_C_BEGIN
1887 #undef __FUNCT__
1888 #define __FUNCT__ "MatCreate_AIJ"
1889 PetscErrorCode MatCreate_AIJ(Mat A)
1890 {
1891   PetscErrorCode ierr;
1892   int size;
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1896   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1897   if (size == 1) {
1898     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1899   } else {
1900     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 EXTERN_C_END
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1908 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1909 {
1910   Mat        mat;
1911   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   *newmat       = 0;
1916   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1917   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1918   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1919   a    = (Mat_MPIAIJ*)mat->data;
1920 
1921   mat->factor       = matin->factor;
1922   mat->assembled    = PETSC_TRUE;
1923   mat->insertmode   = NOT_SET_VALUES;
1924   mat->preallocated = PETSC_TRUE;
1925 
1926   a->rstart       = oldmat->rstart;
1927   a->rend         = oldmat->rend;
1928   a->cstart       = oldmat->cstart;
1929   a->cend         = oldmat->cend;
1930   a->size         = oldmat->size;
1931   a->rank         = oldmat->rank;
1932   a->donotstash   = oldmat->donotstash;
1933   a->roworiented  = oldmat->roworiented;
1934   a->rowindices   = 0;
1935   a->rowvalues    = 0;
1936   a->getrowactive = PETSC_FALSE;
1937 
1938   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1939   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1940   if (oldmat->colmap) {
1941 #if defined (PETSC_USE_CTABLE)
1942     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1943 #else
1944     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1945     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1946     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1947 #endif
1948   } else a->colmap = 0;
1949   if (oldmat->garray) {
1950     int len;
1951     len  = oldmat->B->n;
1952     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1953     PetscLogObjectMemory(mat,len*sizeof(int));
1954     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1955   } else a->garray = 0;
1956 
1957   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1958   PetscLogObjectParent(mat,a->lvec);
1959   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1960   PetscLogObjectParent(mat,a->Mvctx);
1961   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1962   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1963   PetscLogObjectParent(mat,a->A);
1964   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1965   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1966   PetscLogObjectParent(mat,a->B);
1967   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1968   *newmat = mat;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #include "petscsys.h"
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatLoad_MPIAIJ"
1976 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1977 {
1978   Mat            A;
1979   PetscScalar    *vals,*svals;
1980   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1981   MPI_Status     status;
1982   PetscErrorCode ierr;
1983   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1984   int            i,nz,j,rstart,rend,fd;
1985   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1986   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1987   int            cend,cstart,n;
1988 
1989   PetscFunctionBegin;
1990   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1991   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1992   if (!rank) {
1993     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1994     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1995     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1996     if (header[3] < 0) {
1997       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1998     }
1999   }
2000 
2001   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2002   M = header[1]; N = header[2];
2003   /* determine ownership of all rows */
2004   m = M/size + ((M % size) > rank);
2005   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2006   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2007   rowners[0] = 0;
2008   for (i=2; i<=size; i++) {
2009     rowners[i] += rowners[i-1];
2010   }
2011   rstart = rowners[rank];
2012   rend   = rowners[rank+1];
2013 
2014   /* distribute row lengths to all processors */
2015   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2016   offlens = ourlens + (rend-rstart);
2017   if (!rank) {
2018     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2019     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2020     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2021     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2022     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2023     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2024   } else {
2025     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2026   }
2027 
2028   if (!rank) {
2029     /* calculate the number of nonzeros on each processor */
2030     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2031     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2032     for (i=0; i<size; i++) {
2033       for (j=rowners[i]; j< rowners[i+1]; j++) {
2034         procsnz[i] += rowlengths[j];
2035       }
2036     }
2037     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2038 
2039     /* determine max buffer needed and allocate it */
2040     maxnz = 0;
2041     for (i=0; i<size; i++) {
2042       maxnz = PetscMax(maxnz,procsnz[i]);
2043     }
2044     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2045 
2046     /* read in my part of the matrix column indices  */
2047     nz   = procsnz[0];
2048     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2049     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2050 
2051     /* read in every one elses and ship off */
2052     for (i=1; i<size; i++) {
2053       nz   = procsnz[i];
2054       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2055       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2056     }
2057     ierr = PetscFree(cols);CHKERRQ(ierr);
2058   } else {
2059     /* determine buffer space needed for message */
2060     nz = 0;
2061     for (i=0; i<m; i++) {
2062       nz += ourlens[i];
2063     }
2064     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2065 
2066     /* receive message of column indices*/
2067     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2068     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2069     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2070   }
2071 
2072   /* determine column ownership if matrix is not square */
2073   if (N != M) {
2074     n      = N/size + ((N % size) > rank);
2075     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2076     cstart = cend - n;
2077   } else {
2078     cstart = rstart;
2079     cend   = rend;
2080     n      = cend - cstart;
2081   }
2082 
2083   /* loop over local rows, determining number of off diagonal entries */
2084   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2085   jj = 0;
2086   for (i=0; i<m; i++) {
2087     for (j=0; j<ourlens[i]; j++) {
2088       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2089       jj++;
2090     }
2091   }
2092 
2093   /* create our matrix */
2094   for (i=0; i<m; i++) {
2095     ourlens[i] -= offlens[i];
2096   }
2097   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2098   ierr = MatSetType(A,type);CHKERRQ(ierr);
2099   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2100 
2101   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2102   for (i=0; i<m; i++) {
2103     ourlens[i] += offlens[i];
2104   }
2105 
2106   if (!rank) {
2107     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2108 
2109     /* read in my part of the matrix numerical values  */
2110     nz   = procsnz[0];
2111     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2112 
2113     /* insert into matrix */
2114     jj      = rstart;
2115     smycols = mycols;
2116     svals   = vals;
2117     for (i=0; i<m; i++) {
2118       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2119       smycols += ourlens[i];
2120       svals   += ourlens[i];
2121       jj++;
2122     }
2123 
2124     /* read in other processors and ship out */
2125     for (i=1; i<size; i++) {
2126       nz   = procsnz[i];
2127       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2128       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2129     }
2130     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2131   } else {
2132     /* receive numeric values */
2133     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2134 
2135     /* receive message of values*/
2136     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2137     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2138     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2139 
2140     /* insert into matrix */
2141     jj      = rstart;
2142     smycols = mycols;
2143     svals   = vals;
2144     for (i=0; i<m; i++) {
2145       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2146       smycols += ourlens[i];
2147       svals   += ourlens[i];
2148       jj++;
2149     }
2150   }
2151   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2152   ierr = PetscFree(vals);CHKERRQ(ierr);
2153   ierr = PetscFree(mycols);CHKERRQ(ierr);
2154   ierr = PetscFree(rowners);CHKERRQ(ierr);
2155 
2156   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   *newmat = A;
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2164 /*
2165     Not great since it makes two copies of the submatrix, first an SeqAIJ
2166   in local and then by concatenating the local matrices the end result.
2167   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2168 */
2169 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2170 {
2171   PetscErrorCode ierr;
2172   PetscMPIInt    rank,size;
2173   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2174   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2175   Mat            *local,M,Mreuse;
2176   PetscScalar    *vwork,*aa;
2177   MPI_Comm       comm = mat->comm;
2178   Mat_SeqAIJ     *aij;
2179 
2180 
2181   PetscFunctionBegin;
2182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2184 
2185   if (call ==  MAT_REUSE_MATRIX) {
2186     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2187     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2188     local = &Mreuse;
2189     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2190   } else {
2191     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2192     Mreuse = *local;
2193     ierr   = PetscFree(local);CHKERRQ(ierr);
2194   }
2195 
2196   /*
2197       m - number of local rows
2198       n - number of columns (same on all processors)
2199       rstart - first row in new global matrix generated
2200   */
2201   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2202   if (call == MAT_INITIAL_MATRIX) {
2203     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2204     ii  = aij->i;
2205     jj  = aij->j;
2206 
2207     /*
2208         Determine the number of non-zeros in the diagonal and off-diagonal
2209         portions of the matrix in order to do correct preallocation
2210     */
2211 
2212     /* first get start and end of "diagonal" columns */
2213     if (csize == PETSC_DECIDE) {
2214       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2215       if (mglobal == n) { /* square matrix */
2216 	nlocal = m;
2217       } else {
2218         nlocal = n/size + ((n % size) > rank);
2219       }
2220     } else {
2221       nlocal = csize;
2222     }
2223     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2224     rstart = rend - nlocal;
2225     if (rank == size - 1 && rend != n) {
2226       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2227     }
2228 
2229     /* next, compute all the lengths */
2230     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2231     olens = dlens + m;
2232     for (i=0; i<m; i++) {
2233       jend = ii[i+1] - ii[i];
2234       olen = 0;
2235       dlen = 0;
2236       for (j=0; j<jend; j++) {
2237         if (*jj < rstart || *jj >= rend) olen++;
2238         else dlen++;
2239         jj++;
2240       }
2241       olens[i] = olen;
2242       dlens[i] = dlen;
2243     }
2244     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2245     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2246     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2247     ierr = PetscFree(dlens);CHKERRQ(ierr);
2248   } else {
2249     int ml,nl;
2250 
2251     M = *newmat;
2252     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2253     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2254     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2255     /*
2256          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2257        rather than the slower MatSetValues().
2258     */
2259     M->was_assembled = PETSC_TRUE;
2260     M->assembled     = PETSC_FALSE;
2261   }
2262   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2263   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2264   ii  = aij->i;
2265   jj  = aij->j;
2266   aa  = aij->a;
2267   for (i=0; i<m; i++) {
2268     row   = rstart + i;
2269     nz    = ii[i+1] - ii[i];
2270     cwork = jj;     jj += nz;
2271     vwork = aa;     aa += nz;
2272     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2273   }
2274 
2275   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2276   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2277   *newmat = M;
2278 
2279   /* save submatrix used in processor for next request */
2280   if (call ==  MAT_INITIAL_MATRIX) {
2281     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2282     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2283   }
2284 
2285   PetscFunctionReturn(0);
2286 }
2287 
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2290 /*@C
2291    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2292    (the default parallel PETSc format).  For good matrix assembly performance
2293    the user should preallocate the matrix storage by setting the parameters
2294    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2295    performance can be increased by more than a factor of 50.
2296 
2297    Collective on MPI_Comm
2298 
2299    Input Parameters:
2300 +  A - the matrix
2301 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2302            (same value is used for all local rows)
2303 .  d_nnz - array containing the number of nonzeros in the various rows of the
2304            DIAGONAL portion of the local submatrix (possibly different for each row)
2305            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2306            The size of this array is equal to the number of local rows, i.e 'm'.
2307            You must leave room for the diagonal entry even if it is zero.
2308 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2309            submatrix (same value is used for all local rows).
2310 -  o_nnz - array containing the number of nonzeros in the various rows of the
2311            OFF-DIAGONAL portion of the local submatrix (possibly different for
2312            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2313            structure. The size of this array is equal to the number
2314            of local rows, i.e 'm'.
2315 
2316    The AIJ format (also called the Yale sparse matrix format or
2317    compressed row storage), is fully compatible with standard Fortran 77
2318    storage.  That is, the stored row and column indices can begin at
2319    either one (as in Fortran) or zero.  See the users manual for details.
2320 
2321    The user MUST specify either the local or global matrix dimensions
2322    (possibly both).
2323 
2324    The parallel matrix is partitioned such that the first m0 rows belong to
2325    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2326    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2327 
2328    The DIAGONAL portion of the local submatrix of a processor can be defined
2329    as the submatrix which is obtained by extraction the part corresponding
2330    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2331    first row that belongs to the processor, and r2 is the last row belonging
2332    to the this processor. This is a square mxm matrix. The remaining portion
2333    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2334 
2335    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2336 
2337    By default, this format uses inodes (identical nodes) when possible.
2338    We search for consecutive rows with the same nonzero structure, thereby
2339    reusing matrix information to achieve increased efficiency.
2340 
2341    Options Database Keys:
2342 +  -mat_aij_no_inode  - Do not use inodes
2343 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2344 -  -mat_aij_oneindex - Internally use indexing starting at 1
2345         rather than 0.  Note that when calling MatSetValues(),
2346         the user still MUST index entries starting at 0!
2347 
2348    Example usage:
2349 
2350    Consider the following 8x8 matrix with 34 non-zero values, that is
2351    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2352    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2353    as follows:
2354 
2355 .vb
2356             1  2  0  |  0  3  0  |  0  4
2357     Proc0   0  5  6  |  7  0  0  |  8  0
2358             9  0 10  | 11  0  0  | 12  0
2359     -------------------------------------
2360            13  0 14  | 15 16 17  |  0  0
2361     Proc1   0 18  0  | 19 20 21  |  0  0
2362             0  0  0  | 22 23  0  | 24  0
2363     -------------------------------------
2364     Proc2  25 26 27  |  0  0 28  | 29  0
2365            30  0  0  | 31 32 33  |  0 34
2366 .ve
2367 
2368    This can be represented as a collection of submatrices as:
2369 
2370 .vb
2371       A B C
2372       D E F
2373       G H I
2374 .ve
2375 
2376    Where the submatrices A,B,C are owned by proc0, D,E,F are
2377    owned by proc1, G,H,I are owned by proc2.
2378 
2379    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2380    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2381    The 'M','N' parameters are 8,8, and have the same values on all procs.
2382 
2383    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2384    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2385    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2386    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2387    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2388    matrix, ans [DF] as another SeqAIJ matrix.
2389 
2390    When d_nz, o_nz parameters are specified, d_nz storage elements are
2391    allocated for every row of the local diagonal submatrix, and o_nz
2392    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2393    One way to choose d_nz and o_nz is to use the max nonzerors per local
2394    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2395    In this case, the values of d_nz,o_nz are:
2396 .vb
2397      proc0 : dnz = 2, o_nz = 2
2398      proc1 : dnz = 3, o_nz = 2
2399      proc2 : dnz = 1, o_nz = 4
2400 .ve
2401    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2402    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2403    for proc3. i.e we are using 12+15+10=37 storage locations to store
2404    34 values.
2405 
2406    When d_nnz, o_nnz parameters are specified, the storage is specified
2407    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2408    In the above case the values for d_nnz,o_nnz are:
2409 .vb
2410      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2411      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2412      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2413 .ve
2414    Here the space allocated is sum of all the above values i.e 34, and
2415    hence pre-allocation is perfect.
2416 
2417    Level: intermediate
2418 
2419 .keywords: matrix, aij, compressed row, sparse, parallel
2420 
2421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2422 @*/
2423 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2424 {
2425   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2426 
2427   PetscFunctionBegin;
2428   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2429   if (f) {
2430     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2431   }
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatCreateMPIAIJ"
2437 /*@C
2438    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2439    (the default parallel PETSc format).  For good matrix assembly performance
2440    the user should preallocate the matrix storage by setting the parameters
2441    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2442    performance can be increased by more than a factor of 50.
2443 
2444    Collective on MPI_Comm
2445 
2446    Input Parameters:
2447 +  comm - MPI communicator
2448 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2449            This value should be the same as the local size used in creating the
2450            y vector for the matrix-vector product y = Ax.
2451 .  n - This value should be the same as the local size used in creating the
2452        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2453        calculated if N is given) For square matrices n is almost always m.
2454 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2455 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2456 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2457            (same value is used for all local rows)
2458 .  d_nnz - array containing the number of nonzeros in the various rows of the
2459            DIAGONAL portion of the local submatrix (possibly different for each row)
2460            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2461            The size of this array is equal to the number of local rows, i.e 'm'.
2462            You must leave room for the diagonal entry even if it is zero.
2463 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2464            submatrix (same value is used for all local rows).
2465 -  o_nnz - array containing the number of nonzeros in the various rows of the
2466            OFF-DIAGONAL portion of the local submatrix (possibly different for
2467            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2468            structure. The size of this array is equal to the number
2469            of local rows, i.e 'm'.
2470 
2471    Output Parameter:
2472 .  A - the matrix
2473 
2474    Notes:
2475    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2476    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2477    storage requirements for this matrix.
2478 
2479    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2480    processor than it must be used on all processors that share the object for
2481    that argument.
2482 
2483    The AIJ format (also called the Yale sparse matrix format or
2484    compressed row storage), is fully compatible with standard Fortran 77
2485    storage.  That is, the stored row and column indices can begin at
2486    either one (as in Fortran) or zero.  See the users manual for details.
2487 
2488    The user MUST specify either the local or global matrix dimensions
2489    (possibly both).
2490 
2491    The parallel matrix is partitioned such that the first m0 rows belong to
2492    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2493    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2494 
2495    The DIAGONAL portion of the local submatrix of a processor can be defined
2496    as the submatrix which is obtained by extraction the part corresponding
2497    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2498    first row that belongs to the processor, and r2 is the last row belonging
2499    to the this processor. This is a square mxm matrix. The remaining portion
2500    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2501 
2502    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2503 
2504    When calling this routine with a single process communicator, a matrix of
2505    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2506    type of communicator, use the construction mechanism:
2507      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2508 
2509    By default, this format uses inodes (identical nodes) when possible.
2510    We search for consecutive rows with the same nonzero structure, thereby
2511    reusing matrix information to achieve increased efficiency.
2512 
2513    Options Database Keys:
2514 +  -mat_aij_no_inode  - Do not use inodes
2515 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2516 -  -mat_aij_oneindex - Internally use indexing starting at 1
2517         rather than 0.  Note that when calling MatSetValues(),
2518         the user still MUST index entries starting at 0!
2519 
2520 
2521    Example usage:
2522 
2523    Consider the following 8x8 matrix with 34 non-zero values, that is
2524    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2525    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2526    as follows:
2527 
2528 .vb
2529             1  2  0  |  0  3  0  |  0  4
2530     Proc0   0  5  6  |  7  0  0  |  8  0
2531             9  0 10  | 11  0  0  | 12  0
2532     -------------------------------------
2533            13  0 14  | 15 16 17  |  0  0
2534     Proc1   0 18  0  | 19 20 21  |  0  0
2535             0  0  0  | 22 23  0  | 24  0
2536     -------------------------------------
2537     Proc2  25 26 27  |  0  0 28  | 29  0
2538            30  0  0  | 31 32 33  |  0 34
2539 .ve
2540 
2541    This can be represented as a collection of submatrices as:
2542 
2543 .vb
2544       A B C
2545       D E F
2546       G H I
2547 .ve
2548 
2549    Where the submatrices A,B,C are owned by proc0, D,E,F are
2550    owned by proc1, G,H,I are owned by proc2.
2551 
2552    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2553    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2554    The 'M','N' parameters are 8,8, and have the same values on all procs.
2555 
2556    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2557    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2558    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2559    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2560    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2561    matrix, ans [DF] as another SeqAIJ matrix.
2562 
2563    When d_nz, o_nz parameters are specified, d_nz storage elements are
2564    allocated for every row of the local diagonal submatrix, and o_nz
2565    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2566    One way to choose d_nz and o_nz is to use the max nonzerors per local
2567    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2568    In this case, the values of d_nz,o_nz are:
2569 .vb
2570      proc0 : dnz = 2, o_nz = 2
2571      proc1 : dnz = 3, o_nz = 2
2572      proc2 : dnz = 1, o_nz = 4
2573 .ve
2574    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2575    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2576    for proc3. i.e we are using 12+15+10=37 storage locations to store
2577    34 values.
2578 
2579    When d_nnz, o_nnz parameters are specified, the storage is specified
2580    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2581    In the above case the values for d_nnz,o_nnz are:
2582 .vb
2583      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2584      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2585      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2586 .ve
2587    Here the space allocated is sum of all the above values i.e 34, and
2588    hence pre-allocation is perfect.
2589 
2590    Level: intermediate
2591 
2592 .keywords: matrix, aij, compressed row, sparse, parallel
2593 
2594 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2595 @*/
2596 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2597 {
2598   PetscErrorCode ierr;
2599   int size;
2600 
2601   PetscFunctionBegin;
2602   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2604   if (size > 1) {
2605     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2606     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2607   } else {
2608     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2609     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2610   }
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2616 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2617 {
2618   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2619   PetscFunctionBegin;
2620   *Ad     = a->A;
2621   *Ao     = a->B;
2622   *colmap = a->garray;
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2628 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2629 {
2630   PetscErrorCode ierr;
2631   int        i;
2632   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2633 
2634   PetscFunctionBegin;
2635   if (coloring->ctype == IS_COLORING_LOCAL) {
2636     ISColoringValue *allcolors,*colors;
2637     ISColoring      ocoloring;
2638 
2639     /* set coloring for diagonal portion */
2640     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2641 
2642     /* set coloring for off-diagonal portion */
2643     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2644     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2645     for (i=0; i<a->B->n; i++) {
2646       colors[i] = allcolors[a->garray[i]];
2647     }
2648     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2649     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2650     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2651     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2652   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2653     ISColoringValue *colors;
2654     int             *larray;
2655     ISColoring      ocoloring;
2656 
2657     /* set coloring for diagonal portion */
2658     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2659     for (i=0; i<a->A->n; i++) {
2660       larray[i] = i + a->cstart;
2661     }
2662     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2663     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2664     for (i=0; i<a->A->n; i++) {
2665       colors[i] = coloring->colors[larray[i]];
2666     }
2667     ierr = PetscFree(larray);CHKERRQ(ierr);
2668     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2669     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2670     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2671 
2672     /* set coloring for off-diagonal portion */
2673     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2674     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2675     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2676     for (i=0; i<a->B->n; i++) {
2677       colors[i] = coloring->colors[larray[i]];
2678     }
2679     ierr = PetscFree(larray);CHKERRQ(ierr);
2680     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2681     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2682     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2683   } else {
2684     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2685   }
2686 
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2692 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2693 {
2694   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2695   PetscErrorCode ierr;
2696 
2697   PetscFunctionBegin;
2698   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2699   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2705 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2706 {
2707   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2712   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatMerge"
2718 /*@C
2719       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2720                  matrices from each processor
2721 
2722     Collective on MPI_Comm
2723 
2724    Input Parameters:
2725 +    comm - the communicators the parallel matrix will live on
2726 .    inmat - the input sequential matrices
2727 .    n - number of local columns (or PETSC_DECIDE)
2728 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2729 
2730    Output Parameter:
2731 .    outmat - the parallel matrix generated
2732 
2733     Level: advanced
2734 
2735    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2736 
2737 @*/
2738 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2739 {
2740   PetscErrorCode ierr;
2741   int               m,N,i,rstart,nnz,I,*dnz,*onz;
2742   const int         *indx;
2743   const PetscScalar *values;
2744   PetscMap          columnmap,rowmap;
2745 
2746   PetscFunctionBegin;
2747 
2748   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2749   if (scall == MAT_INITIAL_MATRIX){
2750     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2751     if (n == PETSC_DECIDE){
2752       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2753       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2754       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2755       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2756       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2757     }
2758 
2759     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2760     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2761     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2762     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2763     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2764 
2765     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2766     for (i=0;i<m;i++) {
2767       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2768       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2769       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2770     }
2771     /* This routine will ONLY return MPIAIJ type matrix */
2772     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2773     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2774     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2775     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2776 
2777   } else if (scall == MAT_REUSE_MATRIX){
2778     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2779   } else {
2780     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2781   }
2782 
2783   for (i=0;i<m;i++) {
2784     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2785     I    = i + rstart;
2786     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2787     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2788   }
2789   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2790   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2791   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2792 
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatFileSplit"
2798 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2799 {
2800   PetscErrorCode    ierr;
2801   PetscMPIInt       rank;
2802   int               m,N,i,rstart,nnz;
2803   size_t            len;
2804   const int         *indx;
2805   PetscViewer       out;
2806   char              *name;
2807   Mat               B;
2808   const PetscScalar *values;
2809 
2810   PetscFunctionBegin;
2811 
2812   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2813   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2814   /* Should this be the type of the diagonal block of A? */
2815   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2816   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2817   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2818   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2819   for (i=0;i<m;i++) {
2820     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2821     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2822     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2823   }
2824   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2825   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2826 
2827   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2828   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2829   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2830   sprintf(name,"%s.%d",outfile,rank);
2831   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2832   ierr = PetscFree(name);
2833   ierr = MatView(B,out);CHKERRQ(ierr);
2834   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2835   ierr = MatDestroy(B);CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 typedef struct { /* used by MatMerge_SeqsToMPI for reusing the merged matrix */
2840   PetscMap  rowmap;
2841   int       nrecv,nsend,*id_r,*bi,*bj,**ijbuf_r;
2842   int       *len_sra; /* array of length 2*size, len_sra[i]/len_sra[size+i]
2843                          store length of i-th send/recv matrix values */
2844   MatScalar *abuf_s;
2845 } Mat_Merge_SeqsToMPI;
2846 
2847 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2850 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2851 {
2852   PetscErrorCode ierr;
2853   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2854 
2855   PetscFunctionBegin;
2856   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2857   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2858   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2859   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2860   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2861   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2862   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2863   ierr = PetscFree(merge);CHKERRQ(ierr);
2864 
2865   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 #include "src/mat/utils/freespace.h"
2870 #include "petscbt.h"
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatMerge_SeqsToMPI"
2873 /*@C
2874       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2875                  matrices from each processor
2876 
2877     Collective on MPI_Comm
2878 
2879    Input Parameters:
2880 +    comm - the communicators the parallel matrix will live on
2881 .    seqmat - the input sequential matrices
2882 .    m - number of local rows (or PETSC_DECIDE)
2883 .    n - number of local columns (or PETSC_DECIDE)
2884 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2885 
2886    Output Parameter:
2887 .    mpimat - the parallel matrix generated
2888 
2889     Level: advanced
2890 
2891    Notes: The dimensions of the sequential matrix in each processor
2892       MUST be the same.
2893 
2894 @*/
2895 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
2896 {
2897   PetscErrorCode    ierr;
2898   Mat               B_seq,B_mpi;
2899   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2900   PetscMPIInt       size,rank;
2901   int               M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j;
2902   int               tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra;
2903   int               **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2904   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2905   MPI_Status        *status;
2906   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2907   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2908   PetscBT           lnkbt;
2909   Mat_Merge_SeqsToMPI *merge;
2910 
2911   PetscFunctionBegin;
2912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2913   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2914   /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */
2915   if (scall == MAT_INITIAL_MATRIX){
2916     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2917   } else if (scall == MAT_REUSE_MATRIX){
2918     B_mpi = *mpimat;
2919     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2920     bi      = merge->bi;
2921     bj      = merge->bj;
2922     ijbuf_r = merge->ijbuf_r;
2923   } else {
2924     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2925   }
2926   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2927 
2928   /* determine the number of messages to send, their lengths */
2929   /*---------------------------------------------------------*/
2930   if (scall == MAT_INITIAL_MATRIX){
2931     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2932     if (m == PETSC_DECIDE) {
2933       ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2934     } else {
2935       ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
2936     }
2937     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2938     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2939     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2940   }
2941   if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2942   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2943 
2944   len_sa  = merge->len_sra;
2945   len_ra = merge->len_sra + size;
2946 
2947   if (scall == MAT_INITIAL_MATRIX){
2948     merge->nsend = 0;
2949     len = len_a = 0;
2950     for (proc=0; proc<size; proc++){
2951       len_s[proc] = len_sa[proc] = 0;
2952       if (proc == rank) continue;
2953       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2954         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2955       }
2956       if (len_sa[proc]) {
2957         merge->nsend++;
2958         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2959         if (len < len_s[proc]) len = len_s[proc];
2960         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2961       }
2962     }
2963 
2964     /* determine the number and length of messages to receive */
2965     /*--------------------------------------------------------*/
2966     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2967     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2968     /*
2969     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv);
2970     for (i=0; i<merge->nrecv; i++){
2971       ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]);
2972     }
2973     */
2974 
2975     /* post the Irecvs corresponding to these messages */
2976     /*-------------------------------------------------*/
2977     /* get new tag to keep the communication clean */
2978     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2979     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2980 
2981     /* post the sends */
2982     /*----------------*/
2983     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2984     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2985     k = 0;
2986     for (proc=0; proc<size; proc++){
2987       if (!len_s[proc]) continue;
2988       /* form outgoing ij data to be sent to [proc] */
2989       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2990       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2991         anzi = ai[i+1] - ai[i];
2992         if (i==owners[proc]){
2993           ijbuf_s[i-owners[proc]] = anzi;
2994         } else {
2995           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2996         }
2997         for (j=0; j <anzi; j++){
2998           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2999       }
3000       }
3001       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
3002       k++;
3003     }
3004 
3005     /* receives and sends of ij-structure are complete, deallocate memories */
3006     /*----------------------------------------------------------------------*/
3007     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
3008     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
3009 
3010     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
3011     ierr = PetscFree(s_waits);CHKERRQ(ierr);
3012     ierr = PetscFree(r_waits);CHKERRQ(ierr);
3013   }
3014 
3015   /* send and recv matrix values */
3016   /*-----------------------------*/
3017   if (scall == MAT_INITIAL_MATRIX){
3018     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
3019     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
3020   }
3021   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3022   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
3023 
3024   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3025   k = 0;
3026   for (proc=0; proc<size; proc++){
3027     if (!len_sa[proc]) continue;
3028     /* form outgoing mat value data to be sent to [proc] */
3029     abuf_i = merge->abuf_s;
3030     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3031       anzi = ai[i+1] - ai[i];
3032       for (j=0; j <anzi; j++){
3033         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3034       }
3035     }
3036     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3037     k++;
3038   }
3039 
3040   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3041   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3042   ierr = PetscFree(status);CHKERRQ(ierr);
3043 
3044   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3045   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3046 
3047   /* create seq matrix B_seq in each processor */
3048   /*-------------------------------------------*/
3049   if (scall == MAT_INITIAL_MATRIX){
3050     ierr = PetscFree(len_s);CHKERRQ(ierr);
3051     /* allocate bi array and free space for accumulating nonzero column info */
3052     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3053     bi[0] = 0;
3054 
3055     /* create and initialize a linked list */
3056     nlnk = N+1;
3057     ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3058 
3059     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3060     len = 0;
3061     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3062     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3063     current_space = free_space;
3064 
3065     /* determine symbolic info for each row of B_seq */
3066     for (i=0;i<m;i++) {
3067       bnzi   = 0;
3068       /* add local non-zero cols of this proc's seqmat into lnk */
3069       arow   = owners[rank] + i;
3070       anzi   = ai[arow+1] - ai[arow];
3071       aj     = a->j + ai[arow];
3072       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3073       bnzi += nlnk;
3074       /* add received col data into lnk */
3075       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3076         /* i-th row */
3077         if (i == 0){
3078           anzi = *(ijbuf_r[k]+i);
3079           aj   = ijbuf_r[k] + m;
3080         } else {
3081           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3082           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3083         }
3084         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3085         bnzi += nlnk;
3086       }
3087 
3088       /* if free space is not available, make more free space */
3089       if (current_space->local_remaining<bnzi) {
3090         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3091         nspacedouble++;
3092       }
3093       /* copy data into free space, then initialize lnk */
3094       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3095       current_space->array           += bnzi;
3096       current_space->local_used      += bnzi;
3097       current_space->local_remaining -= bnzi;
3098 
3099       bi[i+1] = bi[i] + bnzi;
3100     }
3101     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3102     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3103 
3104     ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3105     ierr = PetscFree(len_r);CHKERRQ(ierr);
3106 
3107     /* allocate space for ba */
3108     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3109     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3110 
3111     /* put together B_seq */
3112     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3113     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3114 
3115     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3116     /*--------------------------------------------------------------*/
3117     ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr);
3118   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3119 
3120   /* insert mat values of B_mpi */
3121   /*----------------------------*/
3122   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3123 
3124   /* set values of ba */
3125   for (i=0; i<m; i++) {
3126     arow = owners[rank] + i;
3127     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3128     bnzi = bi[i+1] - bi[i];
3129     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3130 
3131     /* add local non-zero vals of this proc's seqmat into ba */
3132     anzi = ai[arow+1] - ai[arow];
3133     aj   = a->j + ai[arow];
3134     aa   = a->a + ai[arow];
3135     nextaj = 0;
3136     for (j=0; nextaj<anzi; j++){
3137       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3138         ba_i[j] += aa[nextaj++];
3139       }
3140     }
3141 
3142     /* add received vals into ba */
3143     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3144       /* i-th row */
3145       if (i == 0){
3146         anzi = *(ijbuf_r[k]+i);
3147         aj   = ijbuf_r[k] + m;
3148         aa   = abuf_r[k];
3149       } else {
3150         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3151         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3152         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3153       }
3154       nextaj = 0;
3155       for (j=0; nextaj<anzi; j++){
3156         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3157           ba_i[j] += aa[nextaj++];
3158         }
3159       }
3160     }
3161 
3162     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3163   }
3164   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166 
3167   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3168   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3169 
3170   /* attach the supporting struct to B_mpi for reuse */
3171   /*-------------------------------------------------*/
3172   if (scall == MAT_INITIAL_MATRIX){
3173     B_mpi->spptr         = (void*)merge;
3174     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3175     merge->bi            = bi;
3176     merge->bj            = bj;
3177     merge->ijbuf_r       = ijbuf_r;
3178 
3179     *mpimat = B_mpi;
3180   }
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 #undef __FUNCT__
3185 #define __FUNCT__ "MatGetLocalMat"
3186 /*@C
3187      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3188 
3189     Collective on Mat
3190 
3191    Input Parameters:
3192 +    A - the matrix
3193 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3194 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3195 
3196    Output Parameter:
3197 .    A_loc - the local sequential matrix generated
3198 
3199     Level: developer
3200 
3201 @*/
3202 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3203 {
3204   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3205   PetscErrorCode    ierr;
3206   int               *idx,i,start,end,ncols,nzA,nzB,*cmap,imark;
3207   IS                isrowa,iscola;
3208   Mat               *aloc;
3209 
3210   PetscFunctionBegin;
3211   if (!row){
3212     start = a->rstart; end = a->rend;
3213     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3214   } else {
3215     isrowa = *row;
3216   }
3217   if (!col){
3218     start = a->cstart;
3219     cmap  = a->garray;
3220     nzA   = a->A->n;
3221     nzB   = a->B->n;
3222     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3223     ncols = 0;
3224     for (i=0; i<nzB; i++) {
3225       if (cmap[i] < start) idx[ncols++] = cmap[i];
3226       else break;
3227     }
3228     imark = i;
3229     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3230     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3231     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3232     ierr = PetscFree(idx);CHKERRQ(ierr);
3233   } else {
3234     iscola = *col;
3235   }
3236   if (scall != MAT_INITIAL_MATRIX){
3237     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3238     aloc[0] = *A_loc;
3239   }
3240   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3241   *A_loc = aloc[0];
3242   ierr = PetscFree(aloc);CHKERRQ(ierr);
3243   if (!row){
3244     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3245   }
3246   if (!col){
3247     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3248   }
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatGetBrowsOfAcols"
3254 /*@C
3255      MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A
3256 
3257     Collective on Mat
3258 
3259    Input Parameters:
3260 +    A,B - the matrices
3261 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3262 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3263 
3264    Output Parameter:
3265 +    rowb, colb - index sets of rows and columns of B to extract
3266 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3267 -    B_seq - the sequential matrix generated
3268 
3269     Level: developer
3270 
3271 @*/
3272 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3273 {
3274   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3275   PetscErrorCode    ierr;
3276   int               *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3277   IS                isrowb,iscolb;
3278   Mat               *bseq;
3279 
3280   PetscFunctionBegin;
3281   if (a->cstart != b->rstart || a->cend != b->rend){
3282     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3283   }
3284 
3285   if (scall == MAT_INITIAL_MATRIX){
3286     start = a->cstart;
3287     cmap  = a->garray;
3288     nzA   = a->A->n;
3289     nzB   = a->B->n;
3290     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3291     ncols = 0;
3292     for (i=0; i<nzB; i++) {
3293       if (cmap[i] < start) idx[ncols++] = cmap[i];
3294       else break;
3295     }
3296     imark = i;
3297     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3298     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3299     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3300     ierr = PetscFree(idx);CHKERRQ(ierr);
3301     *brstart = imark;
3302     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3303   } else {
3304     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3305     isrowb = *rowb; iscolb = *colb;
3306     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3307     bseq[0] = *B_seq;
3308   }
3309   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3310   *B_seq = bseq[0];
3311   ierr = PetscFree(bseq);CHKERRQ(ierr);
3312   if (!rowb){
3313     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3314   } else {
3315     *rowb = isrowb;
3316   }
3317   if (!colb){
3318     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3319   } else {
3320     *colb = iscolb;
3321   }
3322 
3323   PetscFunctionReturn(0);
3324 }
3325