xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 901853e05a144ee1b7dab90d28db377fef86bcf8)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar  value;
190   PetscErrorCode ierr;
191   int          i,j,rstart = aij->rstart,rend = aij->rend;
192   int          cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth   roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat          A = aij->A;
197   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
198   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar  *aa = a->a;
200   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat          B = aij->B;
202   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
203   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar  *ba = b->a;
205 
206   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   int          nonew = a->nonew,shift=0;
208   PetscScalar  *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int         i,j,rstart,ncols,n,flg;
351   int         *row,*col,other_disassembled;
352   PetscScalar *val;
353   InsertMode  addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387     }
388   }
389 
390   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
391     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
392   }
393   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
394   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
395 
396   if (aij->rowvalues) {
397     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
398     aij->rowvalues = 0;
399   }
400 
401   /* used by MatAXPY() */
402   a->xtoy = 0; b->xtoy = 0;
403   a->XtoY = 0; b->XtoY = 0;
404 
405   PetscFunctionReturn(0);
406 }
407 
408 #undef __FUNCT__
409 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
410 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
411 {
412   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
413   PetscErrorCode ierr;
414 
415   PetscFunctionBegin;
416   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
417   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatZeroRows_MPIAIJ"
423 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
424 {
425   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
426   PetscErrorCode ierr;
427   int            i,N,*rows,*owners = l->rowners,size = l->size;
428   int            *nprocs,j,idx,nsends,row;
429   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
430   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
431   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
432   MPI_Comm       comm = A->comm;
433   MPI_Request    *send_waits,*recv_waits;
434   MPI_Status     recv_status,*send_status;
435   IS             istmp;
436   PetscTruth     found;
437 
438   PetscFunctionBegin;
439   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
440   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
441 
442   /*  first count number of contributors to each processor */
443   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
444   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
445   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
446   for (i=0; i<N; i++) {
447     idx = rows[i];
448     found = PETSC_FALSE;
449     for (j=0; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
452       }
453     }
454     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
455   }
456   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
457 
458   /* inform other processors of number of messages and max length*/
459   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
460 
461   /* post receives:   */
462   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
463   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
464   for (i=0; i<nrecvs; i++) {
465     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
466   }
467 
468   /* do sends:
469       1) starts[i] gives the starting index in svalues for stuff going to
470          the ith processor
471   */
472   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
473   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
474   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
475   starts[0] = 0;
476   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
477   for (i=0; i<N; i++) {
478     svalues[starts[owner[i]]++] = rows[i];
479   }
480   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
481 
482   starts[0] = 0;
483   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
484   count = 0;
485   for (i=0; i<size; i++) {
486     if (nprocs[2*i+1]) {
487       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
488     }
489   }
490   ierr = PetscFree(starts);CHKERRQ(ierr);
491 
492   base = owners[rank];
493 
494   /*  wait on receives */
495   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
496   source = lens + nrecvs;
497   count  = nrecvs; slen = 0;
498   while (count) {
499     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
500     /* unpack receives into our local space */
501     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
502     source[imdex]  = recv_status.MPI_SOURCE;
503     lens[imdex]    = n;
504     slen          += n;
505     count--;
506   }
507   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
508 
509   /* move the data into the send scatter */
510   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
511   count = 0;
512   for (i=0; i<nrecvs; i++) {
513     values = rvalues + i*nmax;
514     for (j=0; j<lens[i]; j++) {
515       lrows[count++] = values[j] - base;
516     }
517   }
518   ierr = PetscFree(rvalues);CHKERRQ(ierr);
519   ierr = PetscFree(lens);CHKERRQ(ierr);
520   ierr = PetscFree(owner);CHKERRQ(ierr);
521   ierr = PetscFree(nprocs);CHKERRQ(ierr);
522 
523   /* actually zap the local rows */
524   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
525   PetscLogObjectParent(A,istmp);
526 
527   /*
528         Zero the required rows. If the "diagonal block" of the matrix
529      is square and the user wishes to set the diagonal we use seperate
530      code so that MatSetValues() is not called for each diagonal allocating
531      new memory, thus calling lots of mallocs and slowing things down.
532 
533        Contributed by: Mathew Knepley
534   */
535   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
536   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
537   if (diag && (l->A->M == l->A->N)) {
538     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
539   } else if (diag) {
540     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
541     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
542       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
543 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
544     }
545     for (i = 0; i < slen; i++) {
546       row  = lrows[i] + rstart;
547       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
548     }
549     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
550     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
551   } else {
552     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
553   }
554   ierr = ISDestroy(istmp);CHKERRQ(ierr);
555   ierr = PetscFree(lrows);CHKERRQ(ierr);
556 
557   /* wait on sends */
558   if (nsends) {
559     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
560     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
561     ierr = PetscFree(send_status);CHKERRQ(ierr);
562   }
563   ierr = PetscFree(send_waits);CHKERRQ(ierr);
564   ierr = PetscFree(svalues);CHKERRQ(ierr);
565 
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMult_MPIAIJ"
571 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
572 {
573   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
574   PetscErrorCode ierr;
575   int        nt;
576 
577   PetscFunctionBegin;
578   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
579   if (nt != A->n) {
580     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
581   }
582   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
583   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
584   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMultAdd_MPIAIJ"
591 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   PetscErrorCode ierr;
595 
596   PetscFunctionBegin;
597   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
598   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
599   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
606 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
607 {
608   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
609   PetscErrorCode ierr;
610 
611   PetscFunctionBegin;
612   /* do nondiagonal part */
613   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
614   /* send it on its way */
615   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
616   /* do local part */
617   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
618   /* receive remote parts: note this assumes the values are not actually */
619   /* inserted in yy until the next line, which is true for my implementation*/
620   /* but is not perhaps always true. */
621   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
628 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
629 {
630   MPI_Comm comm;
631   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
632   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
633   IS         Me,Notme;
634   PetscErrorCode ierr;
635   int        M,N,first,last,*notme,ntids,i;
636 
637   PetscFunctionBegin;
638 
639   /* Easy test: symmetric diagonal block */
640   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
641   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
642   if (!*f) PetscFunctionReturn(0);
643   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
645   if (ntids==1) PetscFunctionReturn(0);
646 
647   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
648   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
649   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
650   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
651   for (i=0; i<first; i++) notme[i] = i;
652   for (i=last; i<M; i++) notme[i-last+first] = i;
653   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
654   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
655   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
656   Aoff = Aoffs[0];
657   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
658   Boff = Boffs[0];
659   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
660   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
662   ierr = ISDestroy(Me);CHKERRQ(ierr);
663   ierr = ISDestroy(Notme);CHKERRQ(ierr);
664 
665   PetscFunctionReturn(0);
666 }
667 EXTERN_C_END
668 
669 #undef __FUNCT__
670 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
671 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
672 {
673   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
674   PetscErrorCode ierr;
675 
676   PetscFunctionBegin;
677   /* do nondiagonal part */
678   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
679   /* send it on its way */
680   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
681   /* do local part */
682   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
683   /* receive remote parts: note this assumes the values are not actually */
684   /* inserted in yy until the next line, which is true for my implementation*/
685   /* but is not perhaps always true. */
686   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 /*
691   This only works correctly for square matrices where the subblock A->A is the
692    diagonal block
693 */
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
696 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
697 {
698   PetscErrorCode ierr;
699   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
700 
701   PetscFunctionBegin;
702   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
703   if (a->rstart != a->cstart || a->rend != a->cend) {
704     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
705   }
706   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 #undef __FUNCT__
711 #define __FUNCT__ "MatScale_MPIAIJ"
712 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
713 {
714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
715   PetscErrorCode ierr;
716 
717   PetscFunctionBegin;
718   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
719   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatDestroy_MPIAIJ"
725 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
726 {
727   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
728   PetscErrorCode ierr;
729 
730   PetscFunctionBegin;
731 #if defined(PETSC_USE_LOG)
732   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
733 #endif
734   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
735   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
736   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
738 #if defined (PETSC_USE_CTABLE)
739   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
740 #else
741   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
742 #endif
743   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
744   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
745   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
746   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
747   ierr = PetscFree(aij);CHKERRQ(ierr);
748 
749   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
750   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
751   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
752   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #undef __FUNCT__
759 #define __FUNCT__ "MatView_MPIAIJ_Binary"
760 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
761 {
762   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
763   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
764   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
765   PetscErrorCode    ierr;
766   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
767   int               fd;
768   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
769   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
770   PetscScalar       *column_values;
771 
772   PetscFunctionBegin;
773   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
774   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
775   nz   = A->nz + B->nz;
776   if (!rank) {
777     header[0] = MAT_FILE_COOKIE;
778     header[1] = mat->M;
779     header[2] = mat->N;
780     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
781     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
782     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
783     /* get largest number of rows any processor has */
784     rlen = mat->m;
785     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = PetscMax(rlen,range[i+1] - range[i]);
788     }
789   } else {
790     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
791     rlen = mat->m;
792   }
793 
794   /* load up the local row counts */
795   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
796   for (i=0; i<mat->m; i++) {
797     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
798   }
799 
800   /* store the row lengths to the file */
801   if (!rank) {
802     MPI_Status status;
803     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       rlen = range[i+1] - range[i];
806       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
807       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
808     }
809   } else {
810     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
811   }
812   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
813 
814   /* load up the local column indices */
815   nzmax = nz; /* )th processor needs space a largest processor needs */
816   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
817   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
818   cnt  = 0;
819   for (i=0; i<mat->m; i++) {
820     for (j=B->i[i]; j<B->i[i+1]; j++) {
821       if ( (col = garray[B->j[j]]) > cstart) break;
822       column_indices[cnt++] = col;
823     }
824     for (k=A->i[i]; k<A->i[i+1]; k++) {
825       column_indices[cnt++] = A->j[k] + cstart;
826     }
827     for (; j<B->i[i+1]; j++) {
828       column_indices[cnt++] = garray[B->j[j]];
829     }
830   }
831   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
832 
833   /* store the column indices to the file */
834   if (!rank) {
835     MPI_Status status;
836     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
837     for (i=1; i<size; i++) {
838       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
839       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
840       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
842     }
843   } else {
844     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
845     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(column_indices);CHKERRQ(ierr);
848 
849   /* load up the local column values */
850   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
851   cnt  = 0;
852   for (i=0; i<mat->m; i++) {
853     for (j=B->i[i]; j<B->i[i+1]; j++) {
854       if ( garray[B->j[j]] > cstart) break;
855       column_values[cnt++] = B->a[j];
856     }
857     for (k=A->i[i]; k<A->i[i+1]; k++) {
858       column_values[cnt++] = A->a[k];
859     }
860     for (; j<B->i[i+1]; j++) {
861       column_values[cnt++] = B->a[j];
862     }
863   }
864   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
865 
866   /* store the column values to the file */
867   if (!rank) {
868     MPI_Status status;
869     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
870     for (i=1; i<size; i++) {
871       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
872       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
873       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
875     }
876   } else {
877     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
878     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
879   }
880   ierr = PetscFree(column_values);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
886 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
887 {
888   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
889   PetscErrorCode    ierr;
890   PetscMPIInt       rank = aij->rank,size = aij->size;
891   PetscTruth        isdraw,iascii,flg,isbinary;
892   PetscViewer       sviewer;
893   PetscViewerFormat format;
894 
895   PetscFunctionBegin;
896   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
897   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
898   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
899   if (iascii) {
900     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
901     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
902       MatInfo info;
903       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
904       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
905       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
906       if (flg) {
907         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
908 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
911 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
912       }
913       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
914       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
915       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
916       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
917       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
918       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
919       PetscFunctionReturn(0);
920     } else if (format == PETSC_VIEWER_ASCII_INFO) {
921       PetscFunctionReturn(0);
922     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
923       PetscFunctionReturn(0);
924     }
925   } else if (isbinary) {
926     if (size == 1) {
927       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
928       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
929     } else {
930       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
931     }
932     PetscFunctionReturn(0);
933   } else if (isdraw) {
934     PetscDraw  draw;
935     PetscTruth isnull;
936     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
937     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
938   }
939 
940   if (size == 1) {
941     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
942     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
943   } else {
944     /* assemble the entire matrix onto first processor. */
945     Mat         A;
946     Mat_SeqAIJ *Aloc;
947     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
948     PetscScalar *a;
949 
950     if (!rank) {
951       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
952     } else {
953       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
954     }
955     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
956     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
957     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
958     PetscLogObjectParent(mat,A);
959 
960     /* copy over the A part */
961     Aloc = (Mat_SeqAIJ*)aij->A->data;
962     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
963     row = aij->rstart;
964     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
965     for (i=0; i<m; i++) {
966       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
967       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
968     }
969     aj = Aloc->j;
970     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
971 
972     /* copy over the B part */
973     Aloc = (Mat_SeqAIJ*)aij->B->data;
974     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
975     row  = aij->rstart;
976     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
977     ct   = cols;
978     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
979     for (i=0; i<m; i++) {
980       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
981       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
982     }
983     ierr = PetscFree(ct);CHKERRQ(ierr);
984     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
985     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
986     /*
987        Everyone has to call to draw the matrix since the graphics waits are
988        synchronized across all processors that share the PetscDraw object
989     */
990     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
991     if (!rank) {
992       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
993       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
994     }
995     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
996     ierr = MatDestroy(A);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatView_MPIAIJ"
1003 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1004 {
1005   PetscErrorCode ierr;
1006   PetscTruth iascii,isdraw,issocket,isbinary;
1007 
1008   PetscFunctionBegin;
1009   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1010   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1011   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1012   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1013   if (iascii || isdraw || isbinary || issocket) {
1014     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1015   } else {
1016     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1017   }
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatRelax_MPIAIJ"
1025 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1026 {
1027   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1028   PetscErrorCode ierr;
1029   Vec          bb1;
1030   PetscScalar  mone=-1.0;
1031 
1032   PetscFunctionBegin;
1033   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1034 
1035   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1036 
1037   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1038     if (flag & SOR_ZERO_INITIAL_GUESS) {
1039       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1040       its--;
1041     }
1042 
1043     while (its--) {
1044       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1045       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1046 
1047       /* update rhs: bb1 = bb - B*x */
1048       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1049       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1050 
1051       /* local sweep */
1052       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1053       CHKERRQ(ierr);
1054     }
1055   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1056     if (flag & SOR_ZERO_INITIAL_GUESS) {
1057       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1058       its--;
1059     }
1060     while (its--) {
1061       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1063 
1064       /* update rhs: bb1 = bb - B*x */
1065       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1066       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1067 
1068       /* local sweep */
1069       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1070       CHKERRQ(ierr);
1071     }
1072   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1073     if (flag & SOR_ZERO_INITIAL_GUESS) {
1074       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1075       its--;
1076     }
1077     while (its--) {
1078       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1079       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1080 
1081       /* update rhs: bb1 = bb - B*x */
1082       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1083       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1084 
1085       /* local sweep */
1086       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1087       CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1091   }
1092 
1093   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1099 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1100 {
1101   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1102   Mat        A = mat->A,B = mat->B;
1103   PetscErrorCode ierr;
1104   PetscReal  isend[5],irecv[5];
1105 
1106   PetscFunctionBegin;
1107   info->block_size     = 1.0;
1108   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1109   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1110   isend[3] = info->memory;  isend[4] = info->mallocs;
1111   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1112   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1113   isend[3] += info->memory;  isend[4] += info->mallocs;
1114   if (flag == MAT_LOCAL) {
1115     info->nz_used      = isend[0];
1116     info->nz_allocated = isend[1];
1117     info->nz_unneeded  = isend[2];
1118     info->memory       = isend[3];
1119     info->mallocs      = isend[4];
1120   } else if (flag == MAT_GLOBAL_MAX) {
1121     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1122     info->nz_used      = irecv[0];
1123     info->nz_allocated = irecv[1];
1124     info->nz_unneeded  = irecv[2];
1125     info->memory       = irecv[3];
1126     info->mallocs      = irecv[4];
1127   } else if (flag == MAT_GLOBAL_SUM) {
1128     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1129     info->nz_used      = irecv[0];
1130     info->nz_allocated = irecv[1];
1131     info->nz_unneeded  = irecv[2];
1132     info->memory       = irecv[3];
1133     info->mallocs      = irecv[4];
1134   }
1135   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1136   info->fill_ratio_needed = 0;
1137   info->factor_mallocs    = 0;
1138   info->rows_global       = (double)matin->M;
1139   info->columns_global    = (double)matin->N;
1140   info->rows_local        = (double)matin->m;
1141   info->columns_local     = (double)matin->N;
1142 
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatSetOption_MPIAIJ"
1148 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1149 {
1150   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   switch (op) {
1155   case MAT_NO_NEW_NONZERO_LOCATIONS:
1156   case MAT_YES_NEW_NONZERO_LOCATIONS:
1157   case MAT_COLUMNS_UNSORTED:
1158   case MAT_COLUMNS_SORTED:
1159   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1160   case MAT_KEEP_ZEROED_ROWS:
1161   case MAT_NEW_NONZERO_LOCATION_ERR:
1162   case MAT_USE_INODES:
1163   case MAT_DO_NOT_USE_INODES:
1164   case MAT_IGNORE_ZERO_ENTRIES:
1165     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1166     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1167     break;
1168   case MAT_ROW_ORIENTED:
1169     a->roworiented = PETSC_TRUE;
1170     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1171     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1172     break;
1173   case MAT_ROWS_SORTED:
1174   case MAT_ROWS_UNSORTED:
1175   case MAT_YES_NEW_DIAGONALS:
1176     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1177     break;
1178   case MAT_COLUMN_ORIENTED:
1179     a->roworiented = PETSC_FALSE;
1180     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1181     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1182     break;
1183   case MAT_IGNORE_OFF_PROC_ENTRIES:
1184     a->donotstash = PETSC_TRUE;
1185     break;
1186   case MAT_NO_NEW_DIAGONALS:
1187     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1188   case MAT_SYMMETRIC:
1189   case MAT_STRUCTURALLY_SYMMETRIC:
1190   case MAT_HERMITIAN:
1191   case MAT_SYMMETRY_ETERNAL:
1192     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1193     break;
1194   case MAT_NOT_SYMMETRIC:
1195   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1196   case MAT_NOT_HERMITIAN:
1197   case MAT_NOT_SYMMETRY_ETERNAL:
1198     break;
1199   default:
1200     SETERRQ(PETSC_ERR_SUP,"unknown option");
1201   }
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatGetRow_MPIAIJ"
1207 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1208 {
1209   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1210   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1211   PetscErrorCode ierr;
1212   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1213   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1214   int          *cmap,*idx_p;
1215 
1216   PetscFunctionBegin;
1217   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1218   mat->getrowactive = PETSC_TRUE;
1219 
1220   if (!mat->rowvalues && (idx || v)) {
1221     /*
1222         allocate enough space to hold information from the longest row.
1223     */
1224     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1225     int     max = 1,tmp;
1226     for (i=0; i<matin->m; i++) {
1227       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1228       if (max < tmp) { max = tmp; }
1229     }
1230     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1231     mat->rowindices = (int*)(mat->rowvalues + max);
1232   }
1233 
1234   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1235   lrow = row - rstart;
1236 
1237   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1238   if (!v)   {pvA = 0; pvB = 0;}
1239   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1240   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1241   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1242   nztot = nzA + nzB;
1243 
1244   cmap  = mat->garray;
1245   if (v  || idx) {
1246     if (nztot) {
1247       /* Sort by increasing column numbers, assuming A and B already sorted */
1248       int imark = -1;
1249       if (v) {
1250         *v = v_p = mat->rowvalues;
1251         for (i=0; i<nzB; i++) {
1252           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1253           else break;
1254         }
1255         imark = i;
1256         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1257         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1258       }
1259       if (idx) {
1260         *idx = idx_p = mat->rowindices;
1261         if (imark > -1) {
1262           for (i=0; i<imark; i++) {
1263             idx_p[i] = cmap[cworkB[i]];
1264           }
1265         } else {
1266           for (i=0; i<nzB; i++) {
1267             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1268             else break;
1269           }
1270           imark = i;
1271         }
1272         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1273         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1274       }
1275     } else {
1276       if (idx) *idx = 0;
1277       if (v)   *v   = 0;
1278     }
1279   }
1280   *nz = nztot;
1281   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1282   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1288 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1289 {
1290   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1291 
1292   PetscFunctionBegin;
1293   if (aij->getrowactive == PETSC_FALSE) {
1294     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1295   }
1296   aij->getrowactive = PETSC_FALSE;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatNorm_MPIAIJ"
1302 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1303 {
1304   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1305   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1306   PetscErrorCode ierr;
1307   int          i,j,cstart = aij->cstart;
1308   PetscReal    sum = 0.0;
1309   PetscScalar  *v;
1310 
1311   PetscFunctionBegin;
1312   if (aij->size == 1) {
1313     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1314   } else {
1315     if (type == NORM_FROBENIUS) {
1316       v = amat->a;
1317       for (i=0; i<amat->nz; i++) {
1318 #if defined(PETSC_USE_COMPLEX)
1319         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1320 #else
1321         sum += (*v)*(*v); v++;
1322 #endif
1323       }
1324       v = bmat->a;
1325       for (i=0; i<bmat->nz; i++) {
1326 #if defined(PETSC_USE_COMPLEX)
1327         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1328 #else
1329         sum += (*v)*(*v); v++;
1330 #endif
1331       }
1332       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1333       *norm = sqrt(*norm);
1334     } else if (type == NORM_1) { /* max column norm */
1335       PetscReal *tmp,*tmp2;
1336       int    *jj,*garray = aij->garray;
1337       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1338       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1339       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1340       *norm = 0.0;
1341       v = amat->a; jj = amat->j;
1342       for (j=0; j<amat->nz; j++) {
1343         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1344       }
1345       v = bmat->a; jj = bmat->j;
1346       for (j=0; j<bmat->nz; j++) {
1347         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1348       }
1349       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1350       for (j=0; j<mat->N; j++) {
1351         if (tmp2[j] > *norm) *norm = tmp2[j];
1352       }
1353       ierr = PetscFree(tmp);CHKERRQ(ierr);
1354       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1355     } else if (type == NORM_INFINITY) { /* max row norm */
1356       PetscReal ntemp = 0.0;
1357       for (j=0; j<aij->A->m; j++) {
1358         v = amat->a + amat->i[j];
1359         sum = 0.0;
1360         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1361           sum += PetscAbsScalar(*v); v++;
1362         }
1363         v = bmat->a + bmat->i[j];
1364         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1365           sum += PetscAbsScalar(*v); v++;
1366         }
1367         if (sum > ntemp) ntemp = sum;
1368       }
1369       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1370     } else {
1371       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1372     }
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatTranspose_MPIAIJ"
1379 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1380 {
1381   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1382   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1383   PetscErrorCode ierr;
1384   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1385   Mat          B;
1386   PetscScalar  *array;
1387 
1388   PetscFunctionBegin;
1389   if (!matout && M != N) {
1390     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1391   }
1392 
1393   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1394   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1395   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1396 
1397   /* copy over the A part */
1398   Aloc = (Mat_SeqAIJ*)a->A->data;
1399   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1400   row = a->rstart;
1401   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1402   for (i=0; i<m; i++) {
1403     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1404     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1405   }
1406   aj = Aloc->j;
1407   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1408 
1409   /* copy over the B part */
1410   Aloc = (Mat_SeqAIJ*)a->B->data;
1411   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1412   row  = a->rstart;
1413   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1414   ct   = cols;
1415   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1416   for (i=0; i<m; i++) {
1417     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1418     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1419   }
1420   ierr = PetscFree(ct);CHKERRQ(ierr);
1421   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1422   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1423   if (matout) {
1424     *matout = B;
1425   } else {
1426     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1433 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1434 {
1435   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1436   Mat        a = aij->A,b = aij->B;
1437   PetscErrorCode ierr;
1438   int        s1,s2,s3;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1442   if (rr) {
1443     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1444     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1445     /* Overlap communication with computation. */
1446     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1447   }
1448   if (ll) {
1449     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1450     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1451     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1452   }
1453   /* scale  the diagonal block */
1454   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1455 
1456   if (rr) {
1457     /* Do a scatter end and then right scale the off-diagonal block */
1458     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1459     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1460   }
1461 
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1468 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1469 {
1470   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   if (!a->rank) {
1475     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1482 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1483 {
1484   PetscFunctionBegin;
1485   *bs = 1;
1486   PetscFunctionReturn(0);
1487 }
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1490 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1491 {
1492   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1493   PetscErrorCode ierr;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatEqual_MPIAIJ"
1502 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1503 {
1504   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1505   Mat        a,b,c,d;
1506   PetscTruth flg;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   a = matA->A; b = matA->B;
1511   c = matB->A; d = matB->B;
1512 
1513   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1514   if (flg == PETSC_TRUE) {
1515     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1516   }
1517   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatCopy_MPIAIJ"
1523 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1524 {
1525   PetscErrorCode ierr;
1526   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1527   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1528 
1529   PetscFunctionBegin;
1530   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1531   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1532     /* because of the column compression in the off-processor part of the matrix a->B,
1533        the number of columns in a->B and b->B may be different, hence we cannot call
1534        the MatCopy() directly on the two parts. If need be, we can provide a more
1535        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1536        then copying the submatrices */
1537     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1538   } else {
1539     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1540     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1541   }
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1547 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1548 {
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #include "petscblaslapack.h"
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatAXPY_MPIAIJ"
1559 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1560 {
1561   PetscErrorCode ierr;
1562   int            i;
1563   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1564   PetscBLASInt   bnz,one=1;
1565   Mat_SeqAIJ     *x,*y;
1566 
1567   PetscFunctionBegin;
1568   if (str == SAME_NONZERO_PATTERN) {
1569     x = (Mat_SeqAIJ *)xx->A->data;
1570     y = (Mat_SeqAIJ *)yy->A->data;
1571     bnz = (PetscBLASInt)x->nz;
1572     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1573     x = (Mat_SeqAIJ *)xx->B->data;
1574     y = (Mat_SeqAIJ *)yy->B->data;
1575     bnz = (PetscBLASInt)x->nz;
1576     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1577   } else if (str == SUBSET_NONZERO_PATTERN) {
1578     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1579 
1580     x = (Mat_SeqAIJ *)xx->B->data;
1581     y = (Mat_SeqAIJ *)yy->B->data;
1582     if (y->xtoy && y->XtoY != xx->B) {
1583       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1584       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1585     }
1586     if (!y->xtoy) { /* get xtoy */
1587       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1588       y->XtoY = xx->B;
1589     }
1590     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1591   } else {
1592     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 /* -------------------------------------------------------------------*/
1598 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1599        MatGetRow_MPIAIJ,
1600        MatRestoreRow_MPIAIJ,
1601        MatMult_MPIAIJ,
1602 /* 4*/ MatMultAdd_MPIAIJ,
1603        MatMultTranspose_MPIAIJ,
1604        MatMultTransposeAdd_MPIAIJ,
1605        0,
1606        0,
1607        0,
1608 /*10*/ 0,
1609        0,
1610        0,
1611        MatRelax_MPIAIJ,
1612        MatTranspose_MPIAIJ,
1613 /*15*/ MatGetInfo_MPIAIJ,
1614        MatEqual_MPIAIJ,
1615        MatGetDiagonal_MPIAIJ,
1616        MatDiagonalScale_MPIAIJ,
1617        MatNorm_MPIAIJ,
1618 /*20*/ MatAssemblyBegin_MPIAIJ,
1619        MatAssemblyEnd_MPIAIJ,
1620        0,
1621        MatSetOption_MPIAIJ,
1622        MatZeroEntries_MPIAIJ,
1623 /*25*/ MatZeroRows_MPIAIJ,
1624 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1625        MatLUFactorSymbolic_MPIAIJ_TFS,
1626 #else
1627        0,
1628 #endif
1629        0,
1630        0,
1631        0,
1632 /*30*/ MatSetUpPreallocation_MPIAIJ,
1633        0,
1634        0,
1635        0,
1636        0,
1637 /*35*/ MatDuplicate_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641        0,
1642 /*40*/ MatAXPY_MPIAIJ,
1643        MatGetSubMatrices_MPIAIJ,
1644        MatIncreaseOverlap_MPIAIJ,
1645        MatGetValues_MPIAIJ,
1646        MatCopy_MPIAIJ,
1647 /*45*/ MatPrintHelp_MPIAIJ,
1648        MatScale_MPIAIJ,
1649        0,
1650        0,
1651        0,
1652 /*50*/ MatGetBlockSize_MPIAIJ,
1653        0,
1654        0,
1655        0,
1656        0,
1657 /*55*/ MatFDColoringCreate_MPIAIJ,
1658        0,
1659        MatSetUnfactored_MPIAIJ,
1660        0,
1661        0,
1662 /*60*/ MatGetSubMatrix_MPIAIJ,
1663        MatDestroy_MPIAIJ,
1664        MatView_MPIAIJ,
1665        MatGetPetscMaps_Petsc,
1666        0,
1667 /*65*/ 0,
1668        0,
1669        0,
1670        0,
1671        0,
1672 /*70*/ 0,
1673        0,
1674        MatSetColoring_MPIAIJ,
1675        MatSetValuesAdic_MPIAIJ,
1676        MatSetValuesAdifor_MPIAIJ,
1677 /*75*/ 0,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*80*/ 0,
1683        0,
1684        0,
1685        0,
1686 /*84*/ MatLoad_MPIAIJ,
1687        0,
1688        0,
1689        0,
1690        0,
1691        0,
1692 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1693        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1694        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1695        MatPtAP_MPIAIJ_MPIAIJ,
1696        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1697 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1698        0,
1699        0,
1700        0};
1701 
1702 /* ----------------------------------------------------------------------------------------*/
1703 
1704 EXTERN_C_BEGIN
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1707 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1708 {
1709   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1714   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 EXTERN_C_END
1718 
1719 EXTERN_C_BEGIN
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1722 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1723 {
1724   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1725   PetscErrorCode ierr;
1726 
1727   PetscFunctionBegin;
1728   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1729   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1730   PetscFunctionReturn(0);
1731 }
1732 EXTERN_C_END
1733 
1734 #include "petscpc.h"
1735 EXTERN_C_BEGIN
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1738 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1739 {
1740   Mat_MPIAIJ   *b;
1741   PetscErrorCode ierr;
1742   int          i;
1743 
1744   PetscFunctionBegin;
1745   B->preallocated = PETSC_TRUE;
1746   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1747   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1748   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1749   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1750   if (d_nnz) {
1751     for (i=0; i<B->m; i++) {
1752       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1753     }
1754   }
1755   if (o_nnz) {
1756     for (i=0; i<B->m; i++) {
1757       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1758     }
1759   }
1760   b = (Mat_MPIAIJ*)B->data;
1761   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1762   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1763 
1764   PetscFunctionReturn(0);
1765 }
1766 EXTERN_C_END
1767 
1768 /*MC
1769    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1770 
1771    Options Database Keys:
1772 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1773 
1774   Level: beginner
1775 
1776 .seealso: MatCreateMPIAIJ
1777 M*/
1778 
1779 EXTERN_C_BEGIN
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatCreate_MPIAIJ"
1782 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1783 {
1784   Mat_MPIAIJ *b;
1785   PetscErrorCode ierr;
1786   int        i,size;
1787 
1788   PetscFunctionBegin;
1789   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1790 
1791   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1792   B->data         = (void*)b;
1793   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1794   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1795   B->factor       = 0;
1796   B->assembled    = PETSC_FALSE;
1797   B->mapping      = 0;
1798 
1799   B->insertmode      = NOT_SET_VALUES;
1800   b->size            = size;
1801   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1802 
1803   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1804   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1805 
1806   /* the information in the maps duplicates the information computed below, eventually
1807      we should remove the duplicate information that is not contained in the maps */
1808   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1809   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1810 
1811   /* build local table of row and column ownerships */
1812   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1813   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1814   b->cowners = b->rowners + b->size + 2;
1815   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1816   b->rowners[0] = 0;
1817   for (i=2; i<=b->size; i++) {
1818     b->rowners[i] += b->rowners[i-1];
1819   }
1820   b->rstart = b->rowners[b->rank];
1821   b->rend   = b->rowners[b->rank+1];
1822   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1823   b->cowners[0] = 0;
1824   for (i=2; i<=b->size; i++) {
1825     b->cowners[i] += b->cowners[i-1];
1826   }
1827   b->cstart = b->cowners[b->rank];
1828   b->cend   = b->cowners[b->rank+1];
1829 
1830   /* build cache for off array entries formed */
1831   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1832   b->donotstash  = PETSC_FALSE;
1833   b->colmap      = 0;
1834   b->garray      = 0;
1835   b->roworiented = PETSC_TRUE;
1836 
1837   /* stuff used for matrix vector multiply */
1838   b->lvec      = PETSC_NULL;
1839   b->Mvctx     = PETSC_NULL;
1840 
1841   /* stuff for MatGetRow() */
1842   b->rowindices   = 0;
1843   b->rowvalues    = 0;
1844   b->getrowactive = PETSC_FALSE;
1845 
1846   /* Explicitly create 2 MATSEQAIJ matrices. */
1847   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1848   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1849   PetscLogObjectParent(B,b->A);
1850   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1851   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1852   PetscLogObjectParent(B,b->B);
1853 
1854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1855                                      "MatStoreValues_MPIAIJ",
1856                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1858                                      "MatRetrieveValues_MPIAIJ",
1859                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1860   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1861 				     "MatGetDiagonalBlock_MPIAIJ",
1862                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1864 				     "MatIsTranspose_MPIAIJ",
1865 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1867 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1868 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1870 				     "MatDiagonalScaleLocal_MPIAIJ",
1871 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 EXTERN_C_END
1875 
1876 /*MC
1877    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1878 
1879    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1880    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1881   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1882   for communicators controlling multiple processes.  It is recommended that you call both of
1883   the above preallocation routines for simplicity.
1884 
1885    Options Database Keys:
1886 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1887 
1888   Level: beginner
1889 
1890 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1891 M*/
1892 
1893 EXTERN_C_BEGIN
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatCreate_AIJ"
1896 PetscErrorCode MatCreate_AIJ(Mat A)
1897 {
1898   PetscErrorCode ierr;
1899   int size;
1900 
1901   PetscFunctionBegin;
1902   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1903   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1904   if (size == 1) {
1905     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1906   } else {
1907     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1908   }
1909   PetscFunctionReturn(0);
1910 }
1911 EXTERN_C_END
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1915 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1916 {
1917   Mat        mat;
1918   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   *newmat       = 0;
1923   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1924   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1925   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1926   a    = (Mat_MPIAIJ*)mat->data;
1927 
1928   mat->factor       = matin->factor;
1929   mat->assembled    = PETSC_TRUE;
1930   mat->insertmode   = NOT_SET_VALUES;
1931   mat->preallocated = PETSC_TRUE;
1932 
1933   a->rstart       = oldmat->rstart;
1934   a->rend         = oldmat->rend;
1935   a->cstart       = oldmat->cstart;
1936   a->cend         = oldmat->cend;
1937   a->size         = oldmat->size;
1938   a->rank         = oldmat->rank;
1939   a->donotstash   = oldmat->donotstash;
1940   a->roworiented  = oldmat->roworiented;
1941   a->rowindices   = 0;
1942   a->rowvalues    = 0;
1943   a->getrowactive = PETSC_FALSE;
1944 
1945   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1946   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1947   if (oldmat->colmap) {
1948 #if defined (PETSC_USE_CTABLE)
1949     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1950 #else
1951     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1952     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1953     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1954 #endif
1955   } else a->colmap = 0;
1956   if (oldmat->garray) {
1957     int len;
1958     len  = oldmat->B->n;
1959     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1960     PetscLogObjectMemory(mat,len*sizeof(int));
1961     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1962   } else a->garray = 0;
1963 
1964   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1965   PetscLogObjectParent(mat,a->lvec);
1966   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1967   PetscLogObjectParent(mat,a->Mvctx);
1968   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1969   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1970   PetscLogObjectParent(mat,a->A);
1971   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1972   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1973   PetscLogObjectParent(mat,a->B);
1974   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1975   *newmat = mat;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #include "petscsys.h"
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatLoad_MPIAIJ"
1983 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1984 {
1985   Mat            A;
1986   PetscScalar    *vals,*svals;
1987   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1988   MPI_Status     status;
1989   PetscErrorCode ierr;
1990   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1991   int            i,nz,j,rstart,rend,fd;
1992   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1993   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1994   int            cend,cstart,n;
1995 
1996   PetscFunctionBegin;
1997   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1998   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1999   if (!rank) {
2000     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2001     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2002     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2003     if (header[3] < 0) {
2004       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
2005     }
2006   }
2007 
2008   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2009   M = header[1]; N = header[2];
2010   /* determine ownership of all rows */
2011   m = M/size + ((M % size) > rank);
2012   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2013   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2014   rowners[0] = 0;
2015   for (i=2; i<=size; i++) {
2016     rowners[i] += rowners[i-1];
2017   }
2018   rstart = rowners[rank];
2019   rend   = rowners[rank+1];
2020 
2021   /* distribute row lengths to all processors */
2022   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2023   offlens = ourlens + (rend-rstart);
2024   if (!rank) {
2025     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2026     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2027     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2028     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2029     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2030     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2031   } else {
2032     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2033   }
2034 
2035   if (!rank) {
2036     /* calculate the number of nonzeros on each processor */
2037     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2038     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2039     for (i=0; i<size; i++) {
2040       for (j=rowners[i]; j< rowners[i+1]; j++) {
2041         procsnz[i] += rowlengths[j];
2042       }
2043     }
2044     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2045 
2046     /* determine max buffer needed and allocate it */
2047     maxnz = 0;
2048     for (i=0; i<size; i++) {
2049       maxnz = PetscMax(maxnz,procsnz[i]);
2050     }
2051     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2052 
2053     /* read in my part of the matrix column indices  */
2054     nz   = procsnz[0];
2055     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2056     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2057 
2058     /* read in every one elses and ship off */
2059     for (i=1; i<size; i++) {
2060       nz   = procsnz[i];
2061       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2062       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2063     }
2064     ierr = PetscFree(cols);CHKERRQ(ierr);
2065   } else {
2066     /* determine buffer space needed for message */
2067     nz = 0;
2068     for (i=0; i<m; i++) {
2069       nz += ourlens[i];
2070     }
2071     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2072 
2073     /* receive message of column indices*/
2074     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2075     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2076     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2077   }
2078 
2079   /* determine column ownership if matrix is not square */
2080   if (N != M) {
2081     n      = N/size + ((N % size) > rank);
2082     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2083     cstart = cend - n;
2084   } else {
2085     cstart = rstart;
2086     cend   = rend;
2087     n      = cend - cstart;
2088   }
2089 
2090   /* loop over local rows, determining number of off diagonal entries */
2091   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2092   jj = 0;
2093   for (i=0; i<m; i++) {
2094     for (j=0; j<ourlens[i]; j++) {
2095       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2096       jj++;
2097     }
2098   }
2099 
2100   /* create our matrix */
2101   for (i=0; i<m; i++) {
2102     ourlens[i] -= offlens[i];
2103   }
2104   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2105   ierr = MatSetType(A,type);CHKERRQ(ierr);
2106   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2107 
2108   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2109   for (i=0; i<m; i++) {
2110     ourlens[i] += offlens[i];
2111   }
2112 
2113   if (!rank) {
2114     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2115 
2116     /* read in my part of the matrix numerical values  */
2117     nz   = procsnz[0];
2118     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2119 
2120     /* insert into matrix */
2121     jj      = rstart;
2122     smycols = mycols;
2123     svals   = vals;
2124     for (i=0; i<m; i++) {
2125       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2126       smycols += ourlens[i];
2127       svals   += ourlens[i];
2128       jj++;
2129     }
2130 
2131     /* read in other processors and ship out */
2132     for (i=1; i<size; i++) {
2133       nz   = procsnz[i];
2134       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2135       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2136     }
2137     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2138   } else {
2139     /* receive numeric values */
2140     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2141 
2142     /* receive message of values*/
2143     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2144     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2145     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2146 
2147     /* insert into matrix */
2148     jj      = rstart;
2149     smycols = mycols;
2150     svals   = vals;
2151     for (i=0; i<m; i++) {
2152       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2153       smycols += ourlens[i];
2154       svals   += ourlens[i];
2155       jj++;
2156     }
2157   }
2158   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2159   ierr = PetscFree(vals);CHKERRQ(ierr);
2160   ierr = PetscFree(mycols);CHKERRQ(ierr);
2161   ierr = PetscFree(rowners);CHKERRQ(ierr);
2162 
2163   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2164   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165   *newmat = A;
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2171 /*
2172     Not great since it makes two copies of the submatrix, first an SeqAIJ
2173   in local and then by concatenating the local matrices the end result.
2174   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2175 */
2176 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2177 {
2178   PetscErrorCode ierr;
2179   PetscMPIInt    rank,size;
2180   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2181   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2182   Mat            *local,M,Mreuse;
2183   PetscScalar    *vwork,*aa;
2184   MPI_Comm       comm = mat->comm;
2185   Mat_SeqAIJ     *aij;
2186 
2187 
2188   PetscFunctionBegin;
2189   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2190   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2191 
2192   if (call ==  MAT_REUSE_MATRIX) {
2193     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2194     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2195     local = &Mreuse;
2196     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2197   } else {
2198     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2199     Mreuse = *local;
2200     ierr   = PetscFree(local);CHKERRQ(ierr);
2201   }
2202 
2203   /*
2204       m - number of local rows
2205       n - number of columns (same on all processors)
2206       rstart - first row in new global matrix generated
2207   */
2208   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2209   if (call == MAT_INITIAL_MATRIX) {
2210     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2211     ii  = aij->i;
2212     jj  = aij->j;
2213 
2214     /*
2215         Determine the number of non-zeros in the diagonal and off-diagonal
2216         portions of the matrix in order to do correct preallocation
2217     */
2218 
2219     /* first get start and end of "diagonal" columns */
2220     if (csize == PETSC_DECIDE) {
2221       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2222       if (mglobal == n) { /* square matrix */
2223 	nlocal = m;
2224       } else {
2225         nlocal = n/size + ((n % size) > rank);
2226       }
2227     } else {
2228       nlocal = csize;
2229     }
2230     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2231     rstart = rend - nlocal;
2232     if (rank == size - 1 && rend != n) {
2233       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2234     }
2235 
2236     /* next, compute all the lengths */
2237     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2238     olens = dlens + m;
2239     for (i=0; i<m; i++) {
2240       jend = ii[i+1] - ii[i];
2241       olen = 0;
2242       dlen = 0;
2243       for (j=0; j<jend; j++) {
2244         if (*jj < rstart || *jj >= rend) olen++;
2245         else dlen++;
2246         jj++;
2247       }
2248       olens[i] = olen;
2249       dlens[i] = dlen;
2250     }
2251     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2252     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2253     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2254     ierr = PetscFree(dlens);CHKERRQ(ierr);
2255   } else {
2256     int ml,nl;
2257 
2258     M = *newmat;
2259     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2260     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2261     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2262     /*
2263          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2264        rather than the slower MatSetValues().
2265     */
2266     M->was_assembled = PETSC_TRUE;
2267     M->assembled     = PETSC_FALSE;
2268   }
2269   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2270   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2271   ii  = aij->i;
2272   jj  = aij->j;
2273   aa  = aij->a;
2274   for (i=0; i<m; i++) {
2275     row   = rstart + i;
2276     nz    = ii[i+1] - ii[i];
2277     cwork = jj;     jj += nz;
2278     vwork = aa;     aa += nz;
2279     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2280   }
2281 
2282   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2283   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2284   *newmat = M;
2285 
2286   /* save submatrix used in processor for next request */
2287   if (call ==  MAT_INITIAL_MATRIX) {
2288     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2289     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2290   }
2291 
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #undef __FUNCT__
2296 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2297 /*@C
2298    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2299    (the default parallel PETSc format).  For good matrix assembly performance
2300    the user should preallocate the matrix storage by setting the parameters
2301    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2302    performance can be increased by more than a factor of 50.
2303 
2304    Collective on MPI_Comm
2305 
2306    Input Parameters:
2307 +  A - the matrix
2308 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2309            (same value is used for all local rows)
2310 .  d_nnz - array containing the number of nonzeros in the various rows of the
2311            DIAGONAL portion of the local submatrix (possibly different for each row)
2312            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2313            The size of this array is equal to the number of local rows, i.e 'm'.
2314            You must leave room for the diagonal entry even if it is zero.
2315 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2316            submatrix (same value is used for all local rows).
2317 -  o_nnz - array containing the number of nonzeros in the various rows of the
2318            OFF-DIAGONAL portion of the local submatrix (possibly different for
2319            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2320            structure. The size of this array is equal to the number
2321            of local rows, i.e 'm'.
2322 
2323    The AIJ format (also called the Yale sparse matrix format or
2324    compressed row storage), is fully compatible with standard Fortran 77
2325    storage.  That is, the stored row and column indices can begin at
2326    either one (as in Fortran) or zero.  See the users manual for details.
2327 
2328    The user MUST specify either the local or global matrix dimensions
2329    (possibly both).
2330 
2331    The parallel matrix is partitioned such that the first m0 rows belong to
2332    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2333    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2334 
2335    The DIAGONAL portion of the local submatrix of a processor can be defined
2336    as the submatrix which is obtained by extraction the part corresponding
2337    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2338    first row that belongs to the processor, and r2 is the last row belonging
2339    to the this processor. This is a square mxm matrix. The remaining portion
2340    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2341 
2342    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2343 
2344    By default, this format uses inodes (identical nodes) when possible.
2345    We search for consecutive rows with the same nonzero structure, thereby
2346    reusing matrix information to achieve increased efficiency.
2347 
2348    Options Database Keys:
2349 +  -mat_aij_no_inode  - Do not use inodes
2350 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2351 -  -mat_aij_oneindex - Internally use indexing starting at 1
2352         rather than 0.  Note that when calling MatSetValues(),
2353         the user still MUST index entries starting at 0!
2354 
2355    Example usage:
2356 
2357    Consider the following 8x8 matrix with 34 non-zero values, that is
2358    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2359    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2360    as follows:
2361 
2362 .vb
2363             1  2  0  |  0  3  0  |  0  4
2364     Proc0   0  5  6  |  7  0  0  |  8  0
2365             9  0 10  | 11  0  0  | 12  0
2366     -------------------------------------
2367            13  0 14  | 15 16 17  |  0  0
2368     Proc1   0 18  0  | 19 20 21  |  0  0
2369             0  0  0  | 22 23  0  | 24  0
2370     -------------------------------------
2371     Proc2  25 26 27  |  0  0 28  | 29  0
2372            30  0  0  | 31 32 33  |  0 34
2373 .ve
2374 
2375    This can be represented as a collection of submatrices as:
2376 
2377 .vb
2378       A B C
2379       D E F
2380       G H I
2381 .ve
2382 
2383    Where the submatrices A,B,C are owned by proc0, D,E,F are
2384    owned by proc1, G,H,I are owned by proc2.
2385 
2386    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2387    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2388    The 'M','N' parameters are 8,8, and have the same values on all procs.
2389 
2390    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2391    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2392    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2393    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2394    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2395    matrix, ans [DF] as another SeqAIJ matrix.
2396 
2397    When d_nz, o_nz parameters are specified, d_nz storage elements are
2398    allocated for every row of the local diagonal submatrix, and o_nz
2399    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2400    One way to choose d_nz and o_nz is to use the max nonzerors per local
2401    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2402    In this case, the values of d_nz,o_nz are:
2403 .vb
2404      proc0 : dnz = 2, o_nz = 2
2405      proc1 : dnz = 3, o_nz = 2
2406      proc2 : dnz = 1, o_nz = 4
2407 .ve
2408    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2409    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2410    for proc3. i.e we are using 12+15+10=37 storage locations to store
2411    34 values.
2412 
2413    When d_nnz, o_nnz parameters are specified, the storage is specified
2414    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2415    In the above case the values for d_nnz,o_nnz are:
2416 .vb
2417      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2418      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2419      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2420 .ve
2421    Here the space allocated is sum of all the above values i.e 34, and
2422    hence pre-allocation is perfect.
2423 
2424    Level: intermediate
2425 
2426 .keywords: matrix, aij, compressed row, sparse, parallel
2427 
2428 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2429 @*/
2430 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2431 {
2432   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2433 
2434   PetscFunctionBegin;
2435   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2436   if (f) {
2437     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2438   }
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatCreateMPIAIJ"
2444 /*@C
2445    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2446    (the default parallel PETSc format).  For good matrix assembly performance
2447    the user should preallocate the matrix storage by setting the parameters
2448    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2449    performance can be increased by more than a factor of 50.
2450 
2451    Collective on MPI_Comm
2452 
2453    Input Parameters:
2454 +  comm - MPI communicator
2455 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2456            This value should be the same as the local size used in creating the
2457            y vector for the matrix-vector product y = Ax.
2458 .  n - This value should be the same as the local size used in creating the
2459        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2460        calculated if N is given) For square matrices n is almost always m.
2461 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2462 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2463 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2464            (same value is used for all local rows)
2465 .  d_nnz - array containing the number of nonzeros in the various rows of the
2466            DIAGONAL portion of the local submatrix (possibly different for each row)
2467            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2468            The size of this array is equal to the number of local rows, i.e 'm'.
2469            You must leave room for the diagonal entry even if it is zero.
2470 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2471            submatrix (same value is used for all local rows).
2472 -  o_nnz - array containing the number of nonzeros in the various rows of the
2473            OFF-DIAGONAL portion of the local submatrix (possibly different for
2474            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2475            structure. The size of this array is equal to the number
2476            of local rows, i.e 'm'.
2477 
2478    Output Parameter:
2479 .  A - the matrix
2480 
2481    Notes:
2482    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2483    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2484    storage requirements for this matrix.
2485 
2486    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2487    processor than it must be used on all processors that share the object for
2488    that argument.
2489 
2490    The AIJ format (also called the Yale sparse matrix format or
2491    compressed row storage), is fully compatible with standard Fortran 77
2492    storage.  That is, the stored row and column indices can begin at
2493    either one (as in Fortran) or zero.  See the users manual for details.
2494 
2495    The user MUST specify either the local or global matrix dimensions
2496    (possibly both).
2497 
2498    The parallel matrix is partitioned such that the first m0 rows belong to
2499    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2500    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2501 
2502    The DIAGONAL portion of the local submatrix of a processor can be defined
2503    as the submatrix which is obtained by extraction the part corresponding
2504    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2505    first row that belongs to the processor, and r2 is the last row belonging
2506    to the this processor. This is a square mxm matrix. The remaining portion
2507    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2508 
2509    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2510 
2511    When calling this routine with a single process communicator, a matrix of
2512    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2513    type of communicator, use the construction mechanism:
2514      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2515 
2516    By default, this format uses inodes (identical nodes) when possible.
2517    We search for consecutive rows with the same nonzero structure, thereby
2518    reusing matrix information to achieve increased efficiency.
2519 
2520    Options Database Keys:
2521 +  -mat_aij_no_inode  - Do not use inodes
2522 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2523 -  -mat_aij_oneindex - Internally use indexing starting at 1
2524         rather than 0.  Note that when calling MatSetValues(),
2525         the user still MUST index entries starting at 0!
2526 
2527 
2528    Example usage:
2529 
2530    Consider the following 8x8 matrix with 34 non-zero values, that is
2531    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2532    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2533    as follows:
2534 
2535 .vb
2536             1  2  0  |  0  3  0  |  0  4
2537     Proc0   0  5  6  |  7  0  0  |  8  0
2538             9  0 10  | 11  0  0  | 12  0
2539     -------------------------------------
2540            13  0 14  | 15 16 17  |  0  0
2541     Proc1   0 18  0  | 19 20 21  |  0  0
2542             0  0  0  | 22 23  0  | 24  0
2543     -------------------------------------
2544     Proc2  25 26 27  |  0  0 28  | 29  0
2545            30  0  0  | 31 32 33  |  0 34
2546 .ve
2547 
2548    This can be represented as a collection of submatrices as:
2549 
2550 .vb
2551       A B C
2552       D E F
2553       G H I
2554 .ve
2555 
2556    Where the submatrices A,B,C are owned by proc0, D,E,F are
2557    owned by proc1, G,H,I are owned by proc2.
2558 
2559    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2560    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2561    The 'M','N' parameters are 8,8, and have the same values on all procs.
2562 
2563    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2564    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2565    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2566    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2567    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2568    matrix, ans [DF] as another SeqAIJ matrix.
2569 
2570    When d_nz, o_nz parameters are specified, d_nz storage elements are
2571    allocated for every row of the local diagonal submatrix, and o_nz
2572    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2573    One way to choose d_nz and o_nz is to use the max nonzerors per local
2574    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2575    In this case, the values of d_nz,o_nz are:
2576 .vb
2577      proc0 : dnz = 2, o_nz = 2
2578      proc1 : dnz = 3, o_nz = 2
2579      proc2 : dnz = 1, o_nz = 4
2580 .ve
2581    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2582    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2583    for proc3. i.e we are using 12+15+10=37 storage locations to store
2584    34 values.
2585 
2586    When d_nnz, o_nnz parameters are specified, the storage is specified
2587    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2588    In the above case the values for d_nnz,o_nnz are:
2589 .vb
2590      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2591      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2592      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2593 .ve
2594    Here the space allocated is sum of all the above values i.e 34, and
2595    hence pre-allocation is perfect.
2596 
2597    Level: intermediate
2598 
2599 .keywords: matrix, aij, compressed row, sparse, parallel
2600 
2601 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2602 @*/
2603 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2604 {
2605   PetscErrorCode ierr;
2606   int size;
2607 
2608   PetscFunctionBegin;
2609   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2610   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2611   if (size > 1) {
2612     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2613     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2614   } else {
2615     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2616     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2617   }
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2624 {
2625   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2626   PetscFunctionBegin;
2627   *Ad     = a->A;
2628   *Ao     = a->B;
2629   *colmap = a->garray;
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2635 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2636 {
2637   PetscErrorCode ierr;
2638   int        i;
2639   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2640 
2641   PetscFunctionBegin;
2642   if (coloring->ctype == IS_COLORING_LOCAL) {
2643     ISColoringValue *allcolors,*colors;
2644     ISColoring      ocoloring;
2645 
2646     /* set coloring for diagonal portion */
2647     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2648 
2649     /* set coloring for off-diagonal portion */
2650     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2651     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2652     for (i=0; i<a->B->n; i++) {
2653       colors[i] = allcolors[a->garray[i]];
2654     }
2655     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2656     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2657     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2658     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2659   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2660     ISColoringValue *colors;
2661     int             *larray;
2662     ISColoring      ocoloring;
2663 
2664     /* set coloring for diagonal portion */
2665     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2666     for (i=0; i<a->A->n; i++) {
2667       larray[i] = i + a->cstart;
2668     }
2669     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2670     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2671     for (i=0; i<a->A->n; i++) {
2672       colors[i] = coloring->colors[larray[i]];
2673     }
2674     ierr = PetscFree(larray);CHKERRQ(ierr);
2675     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2676     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2677     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2678 
2679     /* set coloring for off-diagonal portion */
2680     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2681     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2682     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2683     for (i=0; i<a->B->n; i++) {
2684       colors[i] = coloring->colors[larray[i]];
2685     }
2686     ierr = PetscFree(larray);CHKERRQ(ierr);
2687     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2688     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2689     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2690   } else {
2691     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2692   }
2693 
2694   PetscFunctionReturn(0);
2695 }
2696 
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2699 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2700 {
2701   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2706   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2712 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2713 {
2714   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2719   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatMerge"
2725 /*@C
2726       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2727                  matrices from each processor
2728 
2729     Collective on MPI_Comm
2730 
2731    Input Parameters:
2732 +    comm - the communicators the parallel matrix will live on
2733 .    inmat - the input sequential matrices
2734 .    n - number of local columns (or PETSC_DECIDE)
2735 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2736 
2737    Output Parameter:
2738 .    outmat - the parallel matrix generated
2739 
2740     Level: advanced
2741 
2742    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2743 
2744 @*/
2745 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2746 {
2747   PetscErrorCode ierr;
2748   int               m,N,i,rstart,nnz,I,*dnz,*onz;
2749   const int         *indx;
2750   const PetscScalar *values;
2751   PetscMap          columnmap,rowmap;
2752 
2753   PetscFunctionBegin;
2754 
2755   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2756   if (scall == MAT_INITIAL_MATRIX){
2757     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2758     if (n == PETSC_DECIDE){
2759       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2760       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2761       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2762       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2763       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2764     }
2765 
2766     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2767     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2768     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2769     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2770     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2771 
2772     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2773     for (i=0;i<m;i++) {
2774       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2775       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2776       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2777     }
2778     /* This routine will ONLY return MPIAIJ type matrix */
2779     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2780     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2781     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2782     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2783 
2784   } else if (scall == MAT_REUSE_MATRIX){
2785     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2786   } else {
2787     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2788   }
2789 
2790   for (i=0;i<m;i++) {
2791     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2792     I    = i + rstart;
2793     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2794     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2795   }
2796   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2797   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799 
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatFileSplit"
2805 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2806 {
2807   PetscErrorCode    ierr;
2808   PetscMPIInt       rank;
2809   int               m,N,i,rstart,nnz;
2810   size_t            len;
2811   const int         *indx;
2812   PetscViewer       out;
2813   char              *name;
2814   Mat               B;
2815   const PetscScalar *values;
2816 
2817   PetscFunctionBegin;
2818 
2819   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2820   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2821   /* Should this be the type of the diagonal block of A? */
2822   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2823   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2824   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2825   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2826   for (i=0;i<m;i++) {
2827     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2828     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2829     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2830   }
2831   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2832   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2833 
2834   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2835   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2836   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2837   sprintf(name,"%s.%d",outfile,rank);
2838   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2839   ierr = PetscFree(name);
2840   ierr = MatView(B,out);CHKERRQ(ierr);
2841   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2842   ierr = MatDestroy(B);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2849 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2850 {
2851   PetscErrorCode ierr;
2852   Mat_Merge_SeqsToMPI *merge=(Mat_Merge_SeqsToMPI*)A->spptr;
2853 
2854   PetscFunctionBegin;
2855   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2856   ierr = PetscFree(merge->abuf_s);CHKERRQ(ierr);
2857   ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2858   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2859   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2860   ierr = PetscFree(merge->ijbuf_r);CHKERRQ(ierr);
2861   ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2862   ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);
2863   ierr = PetscFree(merge);CHKERRQ(ierr);
2864 
2865   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 #include "src/mat/utils/freespace.h"
2870 #include "petscbt.h"
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatMerge_SeqsToMPI"
2873 /*@C
2874       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2875                  matrices from each processor
2876 
2877     Collective on MPI_Comm
2878 
2879    Input Parameters:
2880 +    comm - the communicators the parallel matrix will live on
2881 .    seqmat - the input sequential matrices
2882 .    m - number of local rows (or PETSC_DECIDE)
2883 .    n - number of local columns (or PETSC_DECIDE)
2884 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2885 
2886    Output Parameter:
2887 .    mpimat - the parallel matrix generated
2888 
2889     Level: advanced
2890 
2891    Notes: The dimensions of the sequential matrix in each processor
2892       MUST be the same.
2893 
2894 @*/
2895 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
2896 {
2897   PetscErrorCode    ierr;
2898   Mat               B_seq,B_mpi;
2899   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2900   PetscMPIInt       size,rank;
2901   int               M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j;
2902   int               tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra;
2903   int               **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2904   MPI_Request       *s_waits,*r_waits,*s_waitsa,*r_waitsa;
2905   MPI_Status        *status;
2906   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2907   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2908   PetscBT           lnkbt;
2909   Mat_Merge_SeqsToMPI *merge;
2910 
2911   PetscFunctionBegin;
2912   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2913   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2914   /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */
2915   if (scall == MAT_INITIAL_MATRIX){
2916     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2917   } else if (scall == MAT_REUSE_MATRIX){
2918     B_mpi = *mpimat;
2919     merge = (Mat_Merge_SeqsToMPI*)B_mpi->spptr;
2920     bi      = merge->bi;
2921     bj      = merge->bj;
2922     ijbuf_r = merge->ijbuf_r;
2923   } else {
2924     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2925   }
2926   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2927 
2928   /* determine the number of messages to send, their lengths */
2929   /*---------------------------------------------------------*/
2930   if (scall == MAT_INITIAL_MATRIX){
2931     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2932     if (m == PETSC_DECIDE) {
2933       ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2934     } else {
2935       ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
2936     }
2937     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2938     ierr = PetscMalloc(size*sizeof(int),&len_s);CHKERRQ(ierr);
2939     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2940   }
2941   if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2942   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2943 
2944   len_sa  = merge->len_sra;
2945   len_ra = merge->len_sra + size;
2946 
2947   if (scall == MAT_INITIAL_MATRIX){
2948     merge->nsend = 0;
2949     len = len_a = 0;
2950     for (proc=0; proc<size; proc++){
2951       len_s[proc] = len_sa[proc] = 0;
2952       if (proc == rank) continue;
2953       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2954         len_sa[proc] += ai[i+1] - ai[i]; /* num of cols sent to [proc] */
2955       }
2956       if (len_sa[proc]) {
2957         merge->nsend++;
2958         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2959         if (len < len_s[proc]) len = len_s[proc];
2960         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2961       }
2962     }
2963 
2964     /* determine the number and length of messages to receive */
2965     /*--------------------------------------------------------*/
2966     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2967     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr);
2968     /*
2969     ierr = PetscPrintf(PETSC_COMM_SELF," [%d] nsend: %d, nrecv: %d\n",rank,merge->nsend,merge->nrecv);
2970     for (i=0; i<merge->nrecv; i++){
2971       ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_r=%d from [%d]\n",rank,len_r[i],merge->id_r[i]);
2972     }
2973     */
2974 
2975     /* post the Irecvs corresponding to these messages */
2976     /*-------------------------------------------------*/
2977     /* get new tag to keep the communication clean */
2978     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
2979     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
2980 
2981     /* post the sends */
2982     /*----------------*/
2983     ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2984     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
2985     k = 0;
2986     for (proc=0; proc<size; proc++){
2987       if (!len_s[proc]) continue;
2988       /* form outgoing ij data to be sent to [proc] */
2989       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
2990       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
2991         anzi = ai[i+1] - ai[i];
2992         if (i==owners[proc]){
2993           ijbuf_s[i-owners[proc]] = anzi;
2994         } else {
2995           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
2996         }
2997         for (j=0; j <anzi; j++){
2998           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
2999       }
3000       }
3001       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
3002       k++;
3003     }
3004 
3005     /* receives and sends of ij-structure are complete, deallocate memories */
3006     /*----------------------------------------------------------------------*/
3007     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
3008     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
3009 
3010     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
3011     ierr = PetscFree(s_waits);CHKERRQ(ierr);
3012     ierr = PetscFree(r_waits);CHKERRQ(ierr);
3013   }
3014 
3015   /* send and recv matrix values */
3016   /*-----------------------------*/
3017   if (scall == MAT_INITIAL_MATRIX){
3018     ierr = PetscMalloc((len_a+1)*sizeof(MatScalar),&merge->abuf_s);CHKERRQ(ierr);
3019     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
3020   }
3021   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3022   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
3023 
3024   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3025   k = 0;
3026   for (proc=0; proc<size; proc++){
3027     if (!len_sa[proc]) continue;
3028     /* form outgoing mat value data to be sent to [proc] */
3029     abuf_i = merge->abuf_s;
3030     for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3031       anzi = ai[i+1] - ai[i];
3032       for (j=0; j <anzi; j++){
3033         *abuf_i = *(aa+ai[i]+j); abuf_i++;
3034       }
3035     }
3036     ierr = MPI_Isend(merge->abuf_s,len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3037     k++;
3038   }
3039 
3040   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3041   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3042   ierr = PetscFree(status);CHKERRQ(ierr);
3043 
3044   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3045   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3046 
3047   /* create seq matrix B_seq in each processor */
3048   /*-------------------------------------------*/
3049   if (scall == MAT_INITIAL_MATRIX){
3050     ierr = PetscFree(len_s);CHKERRQ(ierr);
3051     /* allocate bi array and free space for accumulating nonzero column info */
3052     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3053     bi[0] = 0;
3054 
3055     /* create and initialize a linked list */
3056     nlnk = N+1;
3057     ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3058 
3059     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3060     len = 0;
3061     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3062     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3063     current_space = free_space;
3064 
3065     /* determine symbolic info for each row of B_seq */
3066     for (i=0;i<m;i++) {
3067       bnzi   = 0;
3068       /* add local non-zero cols of this proc's seqmat into lnk */
3069       arow   = owners[rank] + i;
3070       anzi   = ai[arow+1] - ai[arow];
3071       aj     = a->j + ai[arow];
3072       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3073       bnzi += nlnk;
3074       /* add received col data into lnk */
3075       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3076         /* i-th row */
3077         if (i == 0){
3078           anzi = *(ijbuf_r[k]+i);
3079           aj   = ijbuf_r[k] + m;
3080         } else {
3081           anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3082           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3083         }
3084         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3085         bnzi += nlnk;
3086       }
3087 
3088       /* if free space is not available, make more free space */
3089       if (current_space->local_remaining<bnzi) {
3090         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3091         nspacedouble++;
3092       }
3093       /* copy data into free space, then initialize lnk */
3094       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3095       current_space->array           += bnzi;
3096       current_space->local_used      += bnzi;
3097       current_space->local_remaining -= bnzi;
3098 
3099       bi[i+1] = bi[i] + bnzi;
3100     }
3101     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3102     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3103 
3104     ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3105     ierr = PetscFree(len_r);CHKERRQ(ierr);
3106 
3107     /* allocate space for ba */
3108     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3109     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3110 
3111     /* put together B_seq */
3112     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3113     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3114 
3115     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3116     /*--------------------------------------------------------------*/
3117     ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr);
3118   } /* endof if (scall == MAT_INITIAL_MATRIX) */
3119 
3120   /* insert mat values of B_mpi */
3121   /*----------------------------*/
3122   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3123 
3124   /* set values of ba */
3125   for (i=0; i<m; i++) {
3126     arow = owners[rank] + i;
3127     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3128     bnzi = bi[i+1] - bi[i];
3129     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3130 
3131     /* add local non-zero vals of this proc's seqmat into ba */
3132     anzi = ai[arow+1] - ai[arow];
3133     aj   = a->j + ai[arow];
3134     aa   = a->a + ai[arow];
3135     nextaj = 0;
3136     for (j=0; nextaj<anzi; j++){
3137       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3138         ba_i[j] += aa[nextaj++];
3139       }
3140     }
3141 
3142     /* add received vals into ba */
3143     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3144       /* i-th row */
3145       if (i == 0){
3146         anzi = *(ijbuf_r[k]+i);
3147         aj   = ijbuf_r[k] + m;
3148         aa   = abuf_r[k];
3149       } else {
3150         anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1);
3151         aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3152         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3153       }
3154       nextaj = 0;
3155       for (j=0; nextaj<anzi; j++){
3156         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3157           ba_i[j] += aa[nextaj++];
3158         }
3159       }
3160     }
3161 
3162     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3163   }
3164   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3166 
3167   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3168   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3169 
3170   /* attach the supporting struct to B_mpi for reuse */
3171   /*-------------------------------------------------*/
3172   if (scall == MAT_INITIAL_MATRIX){
3173     B_mpi->spptr         = (void*)merge;
3174     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3175     merge->bi            = bi;
3176     merge->bj            = bj;
3177     merge->ijbuf_r       = ijbuf_r;
3178     merge->C_seq         = seqmat;
3179 
3180     *mpimat = B_mpi;
3181   }
3182   PetscFunctionReturn(0);
3183 }
3184 
3185 #undef __FUNCT__
3186 #define __FUNCT__ "MatGetLocalMat"
3187 /*@C
3188      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3189 
3190     Collective on Mat
3191 
3192    Input Parameters:
3193 +    A - the matrix
3194 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3195 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3196 
3197    Output Parameter:
3198 .    A_loc - the local sequential matrix generated
3199 
3200     Level: developer
3201 
3202 @*/
3203 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3204 {
3205   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3206   PetscErrorCode    ierr;
3207   int               *idx,i,start,end,ncols,nzA,nzB,*cmap,imark;
3208   IS                isrowa,iscola;
3209   Mat               *aloc;
3210 
3211   PetscFunctionBegin;
3212   if (!row){
3213     start = a->rstart; end = a->rend;
3214     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3215   } else {
3216     isrowa = *row;
3217   }
3218   if (!col){
3219     start = a->cstart;
3220     cmap  = a->garray;
3221     nzA   = a->A->n;
3222     nzB   = a->B->n;
3223     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3224     ncols = 0;
3225     for (i=0; i<nzB; i++) {
3226       if (cmap[i] < start) idx[ncols++] = cmap[i];
3227       else break;
3228     }
3229     imark = i;
3230     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3231     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3232     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3233     ierr = PetscFree(idx);CHKERRQ(ierr);
3234   } else {
3235     iscola = *col;
3236   }
3237   if (scall != MAT_INITIAL_MATRIX){
3238     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3239     aloc[0] = *A_loc;
3240   }
3241   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3242   *A_loc = aloc[0];
3243   ierr = PetscFree(aloc);CHKERRQ(ierr);
3244   if (!row){
3245     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3246   }
3247   if (!col){
3248     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3249   }
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 #undef __FUNCT__
3254 #define __FUNCT__ "MatGetBrowsOfAcols"
3255 /*@C
3256      MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A
3257 
3258     Collective on Mat
3259 
3260    Input Parameters:
3261 +    A,B - the matrices
3262 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3263 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3264 
3265    Output Parameter:
3266 +    rowb, colb - index sets of rows and columns of B to extract
3267 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3268 -    B_seq - the sequential matrix generated
3269 
3270     Level: developer
3271 
3272 @*/
3273 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3274 {
3275   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3276   PetscErrorCode    ierr;
3277   int               *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3278   IS                isrowb,iscolb;
3279   Mat               *bseq;
3280 
3281   PetscFunctionBegin;
3282   if (a->cstart != b->rstart || a->cend != b->rend){
3283     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3284   }
3285 
3286   if (scall == MAT_INITIAL_MATRIX){
3287     start = a->cstart;
3288     cmap  = a->garray;
3289     nzA   = a->A->n;
3290     nzB   = a->B->n;
3291     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3292     ncols = 0;
3293     for (i=0; i<nzB; i++) {
3294       if (cmap[i] < start) idx[ncols++] = cmap[i];
3295       else break;
3296     }
3297     imark = i;
3298     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3299     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3300     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3301     ierr = PetscFree(idx);CHKERRQ(ierr);
3302     *brstart = imark;
3303     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3304   } else {
3305     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3306     isrowb = *rowb; iscolb = *colb;
3307     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3308     bseq[0] = *B_seq;
3309   }
3310   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3311   *B_seq = bseq[0];
3312   ierr = PetscFree(bseq);CHKERRQ(ierr);
3313   if (!rowb){
3314     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3315   } else {
3316     *rowb = isrowb;
3317   }
3318   if (!colb){
3319     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3320   } else {
3321     *colb = iscolb;
3322   }
3323 
3324   PetscFunctionReturn(0);
3325 }
3326