xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f8a127874a1d4e52756bfe365576a2cc20151c52)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   int        n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(int));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                  ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (nonew == 1) goto a_noinsert; \
58       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
59       if (nrow >= rmax) { \
60         /* there is no extra room in row, therefore enlarge */ \
61         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
62         PetscScalar *new_a; \
63  \
64         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
65  \
66         /* malloc new storage space */ \
67         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
68         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
69         new_j   = (int*)(new_a + new_nz); \
70         new_i   = new_j + new_nz; \
71  \
72         /* copy over old data into new slots */ \
73         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
74         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
75         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
76         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
77         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
78                                                            len*sizeof(int));CHKERRQ(ierr); \
79         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
81                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
82         /* free up old matrix storage */ \
83  \
84         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
85         if (!a->singlemalloc) { \
86            ierr = PetscFree(a->i);CHKERRQ(ierr); \
87            ierr = PetscFree(a->j);CHKERRQ(ierr); \
88         } \
89         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
90         a->singlemalloc = PETSC_TRUE; \
91  \
92         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
93         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
94         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
95         a->maxnz += CHUNKSIZE; \
96         a->reallocs++; \
97       } \
98       N = nrow++ - 1; a->nz++; \
99       /* shift up all the later entries in this row */ \
100       for (ii=N; ii>=_i; ii--) { \
101         rp[ii+1] = rp[ii]; \
102         ap[ii+1] = ap[ii]; \
103       } \
104       rp[_i] = col1;  \
105       ap[_i] = value;  \
106       a_noinsert: ; \
107       ailen[row] = nrow; \
108 }
109 
110 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
111 { \
112  \
113     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
114     rmax = bimax[row]; nrow = bilen[row];  \
115     col1 = col - shift; \
116      \
117     low = 0; high = nrow; \
118     while (high-low > 5) { \
119       t = (low+high)/2; \
120       if (rp[t] > col) high = t; \
121       else             low  = t; \
122     } \
123        for (_i=low; _i<high; _i++) { \
124         if (rp[_i] > col1) break; \
125         if (rp[_i] == col1) { \
126           if (addv == ADD_VALUES) ap[_i] += value;   \
127           else                  ap[_i] = value; \
128           goto b_noinsert; \
129         } \
130       }  \
131       if (nonew == 1) goto b_noinsert; \
132       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
133       if (nrow >= rmax) { \
134         /* there is no extra room in row, therefore enlarge */ \
135         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
136         PetscScalar *new_a; \
137  \
138         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
139  \
140         /* malloc new storage space */ \
141         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
142         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
143         new_j   = (int*)(new_a + new_nz); \
144         new_i   = new_j + new_nz; \
145  \
146         /* copy over old data into new slots */ \
147         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
148         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
149         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
150         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
151         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
152                                                            len*sizeof(int));CHKERRQ(ierr); \
153         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
154         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
155                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
156         /* free up old matrix storage */ \
157  \
158         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
159         if (!b->singlemalloc) { \
160           ierr = PetscFree(b->i);CHKERRQ(ierr); \
161           ierr = PetscFree(b->j);CHKERRQ(ierr); \
162         } \
163         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
164         b->singlemalloc = PETSC_TRUE; \
165  \
166         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
167         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
168         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
169         b->maxnz += CHUNKSIZE; \
170         b->reallocs++; \
171       } \
172       N = nrow++ - 1; b->nz++; \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ap[ii+1] = ap[ii]; \
177       } \
178       rp[_i] = col1;  \
179       ap[_i] = value;  \
180       b_noinsert: ; \
181       bilen[row] = nrow; \
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatSetValues_MPIAIJ"
186 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
187 {
188   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
189   PetscScalar    value;
190   PetscErrorCode ierr;
191   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
192   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
193   PetscTruth     roworiented = aij->roworiented;
194 
195   /* Some Variables required in the macro */
196   Mat            A = aij->A;
197   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
198   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
199   PetscScalar    *aa = a->a;
200   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
201   Mat            B = aij->B;
202   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
203   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
204   PetscScalar    *ba = b->a;
205 
206   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
207   PetscInt       nonew = a->nonew,shift=0;
208   PetscScalar    *ap;
209 
210   PetscFunctionBegin;
211   for (i=0; i<m; i++) {
212     if (im[i] < 0) continue;
213 #if defined(PETSC_USE_BOPT_g)
214     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
215 #endif
216     if (im[i] >= rstart && im[i] < rend) {
217       row = im[i] - rstart;
218       for (j=0; j<n; j++) {
219         if (in[j] >= cstart && in[j] < cend){
220           col = in[j] - cstart;
221           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
222           if (ignorezeroentries && value == 0.0) continue;
223           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
224           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
225         } else if (in[j] < 0) continue;
226 #if defined(PETSC_USE_BOPT_g)
227         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
228 #endif
229         else {
230           if (mat->was_assembled) {
231             if (!aij->colmap) {
232               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
233             }
234 #if defined (PETSC_USE_CTABLE)
235             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
236 	    col--;
237 #else
238             col = aij->colmap[in[j]] - 1;
239 #endif
240             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
241               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
242               col =  in[j];
243               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
244               B = aij->B;
245               b = (Mat_SeqAIJ*)B->data;
246               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
247               ba = b->a;
248             }
249           } else col = in[j];
250           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
251           if (ignorezeroentries && value == 0.0) continue;
252           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
253           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
254         }
255       }
256     } else {
257       if (!aij->donotstash) {
258         if (roworiented) {
259           if (ignorezeroentries && v[i*n] == 0.0) continue;
260           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
261         } else {
262           if (ignorezeroentries && v[i] == 0.0) continue;
263           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
264         }
265       }
266     }
267   }
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatGetValues_MPIAIJ"
273 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
274 {
275   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
276   PetscErrorCode ierr;
277   int        i,j,rstart = aij->rstart,rend = aij->rend;
278   int        cstart = aij->cstart,cend = aij->cend,row,col;
279 
280   PetscFunctionBegin;
281   for (i=0; i<m; i++) {
282     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
283     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
284     if (idxm[i] >= rstart && idxm[i] < rend) {
285       row = idxm[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
288         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
289         if (idxn[j] >= cstart && idxn[j] < cend){
290           col = idxn[j] - cstart;
291           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
292         } else {
293           if (!aij->colmap) {
294             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
295           }
296 #if defined (PETSC_USE_CTABLE)
297           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
298           col --;
299 #else
300           col = aij->colmap[idxn[j]] - 1;
301 #endif
302           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
303           else {
304             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
305           }
306         }
307       }
308     } else {
309       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
310     }
311   }
312   PetscFunctionReturn(0);
313 }
314 
315 #undef __FUNCT__
316 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
317 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
318 {
319   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
320   PetscErrorCode ierr;
321   int         nstash,reallocs;
322   InsertMode  addv;
323 
324   PetscFunctionBegin;
325   if (aij->donotstash) {
326     PetscFunctionReturn(0);
327   }
328 
329   /* make sure all processors are either in INSERTMODE or ADDMODE */
330   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
331   if (addv == (ADD_VALUES|INSERT_VALUES)) {
332     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
333   }
334   mat->insertmode = addv; /* in case this processor had no cache */
335 
336   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
337   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
338   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
339   PetscFunctionReturn(0);
340 }
341 
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
345 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
346 {
347   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
348   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
349   PetscErrorCode ierr;
350   int            i,j,rstart,ncols,n,flg;
351   int            *row,*col,other_disassembled;
352   PetscScalar    *val;
353   InsertMode     addv = mat->insertmode;
354 
355   PetscFunctionBegin;
356   if (!aij->donotstash) {
357     while (1) {
358       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
359       if (!flg) break;
360 
361       for (i=0; i<n;) {
362         /* Now identify the consecutive vals belonging to the same row */
363         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
364         if (j < n) ncols = j-i;
365         else       ncols = n-i;
366         /* Now assemble all these values with a single function call */
367         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
368         i = j;
369       }
370     }
371     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
372   }
373 
374   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
375   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
376 
377   /* determine if any processor has disassembled, if so we must
378      also disassemble ourselfs, in order that we may reassemble. */
379   /*
380      if nonzero structure of submatrix B cannot change then we know that
381      no processor disassembled thus we can skip this stuff
382   */
383   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
384     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
385     if (mat->was_assembled && !other_disassembled) {
386       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
387       /* reaccess the b because aij->B was changed */
388       b    = (Mat_SeqAIJ *)aij->B->data;
389     }
390   }
391 
392   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
393     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
394   }
395   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
396   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
397 
398   if (aij->rowvalues) {
399     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
400     aij->rowvalues = 0;
401   }
402 
403   /* used by MatAXPY() */
404   a->xtoy = 0; b->xtoy = 0;
405   a->XtoY = 0; b->XtoY = 0;
406 
407   PetscFunctionReturn(0);
408 }
409 
410 #undef __FUNCT__
411 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
412 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
413 {
414   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
415   PetscErrorCode ierr;
416 
417   PetscFunctionBegin;
418   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
419   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
420   PetscFunctionReturn(0);
421 }
422 
423 #undef __FUNCT__
424 #define __FUNCT__ "MatZeroRows_MPIAIJ"
425 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
426 {
427   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
428   PetscErrorCode ierr;
429   int            i,N,*rows,*owners = l->rowners,size = l->size;
430   int            *nprocs,j,idx,nsends,row;
431   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
432   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
433   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
434   MPI_Comm       comm = A->comm;
435   MPI_Request    *send_waits,*recv_waits;
436   MPI_Status     recv_status,*send_status;
437   IS             istmp;
438   PetscTruth     found;
439 
440   PetscFunctionBegin;
441   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
442   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
443 
444   /*  first count number of contributors to each processor */
445   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
446   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
447   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
448   for (i=0; i<N; i++) {
449     idx = rows[i];
450     found = PETSC_FALSE;
451     for (j=0; j<size; j++) {
452       if (idx >= owners[j] && idx < owners[j+1]) {
453         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
454       }
455     }
456     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
457   }
458   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
459 
460   /* inform other processors of number of messages and max length*/
461   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
462 
463   /* post receives:   */
464   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
465   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
466   for (i=0; i<nrecvs; i++) {
467     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
468   }
469 
470   /* do sends:
471       1) starts[i] gives the starting index in svalues for stuff going to
472          the ith processor
473   */
474   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
475   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
476   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
477   starts[0] = 0;
478   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
479   for (i=0; i<N; i++) {
480     svalues[starts[owner[i]]++] = rows[i];
481   }
482   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
483 
484   starts[0] = 0;
485   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   count = 0;
487   for (i=0; i<size; i++) {
488     if (nprocs[2*i+1]) {
489       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
490     }
491   }
492   ierr = PetscFree(starts);CHKERRQ(ierr);
493 
494   base = owners[rank];
495 
496   /*  wait on receives */
497   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
498   source = lens + nrecvs;
499   count  = nrecvs; slen = 0;
500   while (count) {
501     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
502     /* unpack receives into our local space */
503     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
504     source[imdex]  = recv_status.MPI_SOURCE;
505     lens[imdex]    = n;
506     slen          += n;
507     count--;
508   }
509   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
510 
511   /* move the data into the send scatter */
512   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
513   count = 0;
514   for (i=0; i<nrecvs; i++) {
515     values = rvalues + i*nmax;
516     for (j=0; j<lens[i]; j++) {
517       lrows[count++] = values[j] - base;
518     }
519   }
520   ierr = PetscFree(rvalues);CHKERRQ(ierr);
521   ierr = PetscFree(lens);CHKERRQ(ierr);
522   ierr = PetscFree(owner);CHKERRQ(ierr);
523   ierr = PetscFree(nprocs);CHKERRQ(ierr);
524 
525   /* actually zap the local rows */
526   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
527   PetscLogObjectParent(A,istmp);
528 
529   /*
530         Zero the required rows. If the "diagonal block" of the matrix
531      is square and the user wishes to set the diagonal we use seperate
532      code so that MatSetValues() is not called for each diagonal allocating
533      new memory, thus calling lots of mallocs and slowing things down.
534 
535        Contributed by: Mathew Knepley
536   */
537   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
538   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
539   if (diag && (l->A->M == l->A->N)) {
540     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
541   } else if (diag) {
542     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
543     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
544       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
545 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
546     }
547     for (i = 0; i < slen; i++) {
548       row  = lrows[i] + rstart;
549       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
550     }
551     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
552     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553   } else {
554     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
555   }
556   ierr = ISDestroy(istmp);CHKERRQ(ierr);
557   ierr = PetscFree(lrows);CHKERRQ(ierr);
558 
559   /* wait on sends */
560   if (nsends) {
561     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
562     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
563     ierr = PetscFree(send_status);CHKERRQ(ierr);
564   }
565   ierr = PetscFree(send_waits);CHKERRQ(ierr);
566   ierr = PetscFree(svalues);CHKERRQ(ierr);
567 
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatMult_MPIAIJ"
573 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
574 {
575   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
576   PetscErrorCode ierr;
577   int        nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != A->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatMultAdd_MPIAIJ"
593 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   PetscErrorCode ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
608 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   PetscErrorCode ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
630 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
631 {
632   MPI_Comm comm;
633   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
634   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
635   IS         Me,Notme;
636   PetscErrorCode ierr;
637   int        M,N,first,last,*notme,ntids,i;
638 
639   PetscFunctionBegin;
640 
641   /* Easy test: symmetric diagonal block */
642   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
643   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
644   if (!*f) PetscFunctionReturn(0);
645   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
646   ierr = MPI_Comm_size(comm,&ntids);CHKERRQ(ierr);
647   if (ntids==1) PetscFunctionReturn(0);
648 
649   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
650   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
651   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
652   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme);CHKERRQ(ierr);
653   for (i=0; i<first; i++) notme[i] = i;
654   for (i=last; i<M; i++) notme[i-last+first] = i;
655   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
656   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
657   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
658   Aoff = Aoffs[0];
659   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
660   Boff = Boffs[0];
661   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
662   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
663   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
664   ierr = ISDestroy(Me);CHKERRQ(ierr);
665   ierr = ISDestroy(Notme);CHKERRQ(ierr);
666 
667   PetscFunctionReturn(0);
668 }
669 EXTERN_C_END
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
673 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
674 {
675   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
676   PetscErrorCode ierr;
677 
678   PetscFunctionBegin;
679   /* do nondiagonal part */
680   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
681   /* send it on its way */
682   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
683   /* do local part */
684   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
685   /* receive remote parts: note this assumes the values are not actually */
686   /* inserted in yy until the next line, which is true for my implementation*/
687   /* but is not perhaps always true. */
688   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
689   PetscFunctionReturn(0);
690 }
691 
692 /*
693   This only works correctly for square matrices where the subblock A->A is the
694    diagonal block
695 */
696 #undef __FUNCT__
697 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
698 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
699 {
700   PetscErrorCode ierr;
701   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
702 
703   PetscFunctionBegin;
704   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
705   if (a->rstart != a->cstart || a->rend != a->cend) {
706     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
707   }
708   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 #undef __FUNCT__
713 #define __FUNCT__ "MatScale_MPIAIJ"
714 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
715 {
716   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
717   PetscErrorCode ierr;
718 
719   PetscFunctionBegin;
720   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
721   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatDestroy_MPIAIJ"
727 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
728 {
729   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733 #if defined(PETSC_USE_LOG)
734   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
735 #endif
736   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
737   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
738   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
739   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
740 #if defined (PETSC_USE_CTABLE)
741   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
742 #else
743   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
744 #endif
745   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
746   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
747   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
748   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
749   ierr = PetscFree(aij);CHKERRQ(ierr);
750 
751   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
752   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
753   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
754   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
755   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
756   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 #undef __FUNCT__
761 #define __FUNCT__ "MatView_MPIAIJ_Binary"
762 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
763 {
764   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
765   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
766   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
767   PetscErrorCode    ierr;
768   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
769   int               fd;
770   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
771   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
772   PetscScalar       *column_values;
773 
774   PetscFunctionBegin;
775   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
776   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
777   nz   = A->nz + B->nz;
778   if (!rank) {
779     header[0] = MAT_FILE_COOKIE;
780     header[1] = mat->M;
781     header[2] = mat->N;
782     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
783     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
784     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     /* get largest number of rows any processor has */
786     rlen = mat->m;
787     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = PetscMax(rlen,range[i+1] - range[i]);
790     }
791   } else {
792     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
793     rlen = mat->m;
794   }
795 
796   /* load up the local row counts */
797   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
798   for (i=0; i<mat->m; i++) {
799     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
800   }
801 
802   /* store the row lengths to the file */
803   if (!rank) {
804     MPI_Status status;
805     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
806     for (i=1; i<size; i++) {
807       rlen = range[i+1] - range[i];
808       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
809       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
810     }
811   } else {
812     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
815 
816   /* load up the local column indices */
817   nzmax = nz; /* )th processor needs space a largest processor needs */
818   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
819   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
820   cnt  = 0;
821   for (i=0; i<mat->m; i++) {
822     for (j=B->i[i]; j<B->i[i+1]; j++) {
823       if ( (col = garray[B->j[j]]) > cstart) break;
824       column_indices[cnt++] = col;
825     }
826     for (k=A->i[i]; k<A->i[i+1]; k++) {
827       column_indices[cnt++] = A->j[k] + cstart;
828     }
829     for (; j<B->i[i+1]; j++) {
830       column_indices[cnt++] = garray[B->j[j]];
831     }
832   }
833   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
834 
835   /* store the column indices to the file */
836   if (!rank) {
837     MPI_Status status;
838     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
839     for (i=1; i<size; i++) {
840       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
842       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
843       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
844     }
845   } else {
846     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
847     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
848   }
849   ierr = PetscFree(column_indices);CHKERRQ(ierr);
850 
851   /* load up the local column values */
852   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
853   cnt  = 0;
854   for (i=0; i<mat->m; i++) {
855     for (j=B->i[i]; j<B->i[i+1]; j++) {
856       if ( garray[B->j[j]] > cstart) break;
857       column_values[cnt++] = B->a[j];
858     }
859     for (k=A->i[i]; k<A->i[i+1]; k++) {
860       column_values[cnt++] = A->a[k];
861     }
862     for (; j<B->i[i+1]; j++) {
863       column_values[cnt++] = B->a[j];
864     }
865   }
866   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
867 
868   /* store the column values to the file */
869   if (!rank) {
870     MPI_Status status;
871     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
872     for (i=1; i<size; i++) {
873       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
875       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
876       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
877     }
878   } else {
879     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
880     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(column_values);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
888 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
889 {
890   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
891   PetscErrorCode    ierr;
892   PetscMPIInt       rank = aij->rank,size = aij->size;
893   PetscTruth        isdraw,iascii,flg,isbinary;
894   PetscViewer       sviewer;
895   PetscViewerFormat format;
896 
897   PetscFunctionBegin;
898   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
899   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
900   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
901   if (iascii) {
902     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
903     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
904       MatInfo info;
905       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
906       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
907       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
908       if (flg) {
909         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
910 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
911       } else {
912         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
913 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
914       }
915       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
916       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
917       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
918       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
919       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
920       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
921       PetscFunctionReturn(0);
922     } else if (format == PETSC_VIEWER_ASCII_INFO) {
923       PetscFunctionReturn(0);
924     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
925       PetscFunctionReturn(0);
926     }
927   } else if (isbinary) {
928     if (size == 1) {
929       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
930       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
931     } else {
932       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
933     }
934     PetscFunctionReturn(0);
935   } else if (isdraw) {
936     PetscDraw  draw;
937     PetscTruth isnull;
938     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
939     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
940   }
941 
942   if (size == 1) {
943     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
944     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
945   } else {
946     /* assemble the entire matrix onto first processor. */
947     Mat         A;
948     Mat_SeqAIJ *Aloc;
949     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
950     PetscScalar *a;
951 
952     if (!rank) {
953       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
954     } else {
955       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
956     }
957     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
958     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
959     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
960     PetscLogObjectParent(mat,A);
961 
962     /* copy over the A part */
963     Aloc = (Mat_SeqAIJ*)aij->A->data;
964     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
965     row = aij->rstart;
966     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
967     for (i=0; i<m; i++) {
968       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
969       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
970     }
971     aj = Aloc->j;
972     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
973 
974     /* copy over the B part */
975     Aloc = (Mat_SeqAIJ*)aij->B->data;
976     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
977     row  = aij->rstart;
978     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
979     ct   = cols;
980     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
984     }
985     ierr = PetscFree(ct);CHKERRQ(ierr);
986     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988     /*
989        Everyone has to call to draw the matrix since the graphics waits are
990        synchronized across all processors that share the PetscDraw object
991     */
992     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
993     if (!rank) {
994       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
995       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
996     }
997     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
998     ierr = MatDestroy(A);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatView_MPIAIJ"
1005 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1006 {
1007   PetscErrorCode ierr;
1008   PetscTruth iascii,isdraw,issocket,isbinary;
1009 
1010   PetscFunctionBegin;
1011   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1012   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1013   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1014   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1015   if (iascii || isdraw || isbinary || issocket) {
1016     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1017   } else {
1018     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatRelax_MPIAIJ"
1027 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1028 {
1029   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1030   PetscErrorCode ierr;
1031   Vec          bb1;
1032   PetscScalar  mone=-1.0;
1033 
1034   PetscFunctionBegin;
1035   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1036 
1037   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1038 
1039   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1040     if (flag & SOR_ZERO_INITIAL_GUESS) {
1041       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1042       its--;
1043     }
1044 
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1058     if (flag & SOR_ZERO_INITIAL_GUESS) {
1059       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1060       its--;
1061     }
1062     while (its--) {
1063       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1064       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1065 
1066       /* update rhs: bb1 = bb - B*x */
1067       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1068       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1069 
1070       /* local sweep */
1071       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1072       CHKERRQ(ierr);
1073     }
1074   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1075     if (flag & SOR_ZERO_INITIAL_GUESS) {
1076       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1077       its--;
1078     }
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else {
1092     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1093   }
1094 
1095   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1101 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1104   Mat        A = mat->A,B = mat->B;
1105   PetscErrorCode ierr;
1106   PetscReal  isend[5],irecv[5];
1107 
1108   PetscFunctionBegin;
1109   info->block_size     = 1.0;
1110   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1111   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1112   isend[3] = info->memory;  isend[4] = info->mallocs;
1113   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1114   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1115   isend[3] += info->memory;  isend[4] += info->mallocs;
1116   if (flag == MAT_LOCAL) {
1117     info->nz_used      = isend[0];
1118     info->nz_allocated = isend[1];
1119     info->nz_unneeded  = isend[2];
1120     info->memory       = isend[3];
1121     info->mallocs      = isend[4];
1122   } else if (flag == MAT_GLOBAL_MAX) {
1123     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1124     info->nz_used      = irecv[0];
1125     info->nz_allocated = irecv[1];
1126     info->nz_unneeded  = irecv[2];
1127     info->memory       = irecv[3];
1128     info->mallocs      = irecv[4];
1129   } else if (flag == MAT_GLOBAL_SUM) {
1130     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1131     info->nz_used      = irecv[0];
1132     info->nz_allocated = irecv[1];
1133     info->nz_unneeded  = irecv[2];
1134     info->memory       = irecv[3];
1135     info->mallocs      = irecv[4];
1136   }
1137   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1138   info->fill_ratio_needed = 0;
1139   info->factor_mallocs    = 0;
1140   info->rows_global       = (double)matin->M;
1141   info->columns_global    = (double)matin->N;
1142   info->rows_local        = (double)matin->m;
1143   info->columns_local     = (double)matin->N;
1144 
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatSetOption_MPIAIJ"
1150 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1151 {
1152   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   switch (op) {
1157   case MAT_NO_NEW_NONZERO_LOCATIONS:
1158   case MAT_YES_NEW_NONZERO_LOCATIONS:
1159   case MAT_COLUMNS_UNSORTED:
1160   case MAT_COLUMNS_SORTED:
1161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1162   case MAT_KEEP_ZEROED_ROWS:
1163   case MAT_NEW_NONZERO_LOCATION_ERR:
1164   case MAT_USE_INODES:
1165   case MAT_DO_NOT_USE_INODES:
1166   case MAT_IGNORE_ZERO_ENTRIES:
1167     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1168     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_ROW_ORIENTED:
1171     a->roworiented = PETSC_TRUE;
1172     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1173     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1174     break;
1175   case MAT_ROWS_SORTED:
1176   case MAT_ROWS_UNSORTED:
1177   case MAT_YES_NEW_DIAGONALS:
1178     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1179     break;
1180   case MAT_COLUMN_ORIENTED:
1181     a->roworiented = PETSC_FALSE;
1182     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1183     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = PETSC_TRUE;
1187     break;
1188   case MAT_NO_NEW_DIAGONALS:
1189     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1190   case MAT_SYMMETRIC:
1191   case MAT_STRUCTURALLY_SYMMETRIC:
1192   case MAT_HERMITIAN:
1193   case MAT_SYMMETRY_ETERNAL:
1194     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_NOT_SYMMETRIC:
1197   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1198   case MAT_NOT_HERMITIAN:
1199   case MAT_NOT_SYMMETRY_ETERNAL:
1200     break;
1201   default:
1202     SETERRQ(PETSC_ERR_SUP,"unknown option");
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatGetRow_MPIAIJ"
1209 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1210 {
1211   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1212   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1213   PetscErrorCode ierr;
1214   int          i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1215   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1216   int          *cmap,*idx_p;
1217 
1218   PetscFunctionBegin;
1219   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1220   mat->getrowactive = PETSC_TRUE;
1221 
1222   if (!mat->rowvalues && (idx || v)) {
1223     /*
1224         allocate enough space to hold information from the longest row.
1225     */
1226     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1227     int     max = 1,tmp;
1228     for (i=0; i<matin->m; i++) {
1229       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1230       if (max < tmp) { max = tmp; }
1231     }
1232     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1233     mat->rowindices = (int*)(mat->rowvalues + max);
1234   }
1235 
1236   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1237   lrow = row - rstart;
1238 
1239   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1240   if (!v)   {pvA = 0; pvB = 0;}
1241   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1242   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1243   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1244   nztot = nzA + nzB;
1245 
1246   cmap  = mat->garray;
1247   if (v  || idx) {
1248     if (nztot) {
1249       /* Sort by increasing column numbers, assuming A and B already sorted */
1250       int imark = -1;
1251       if (v) {
1252         *v = v_p = mat->rowvalues;
1253         for (i=0; i<nzB; i++) {
1254           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1255           else break;
1256         }
1257         imark = i;
1258         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1259         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1260       }
1261       if (idx) {
1262         *idx = idx_p = mat->rowindices;
1263         if (imark > -1) {
1264           for (i=0; i<imark; i++) {
1265             idx_p[i] = cmap[cworkB[i]];
1266           }
1267         } else {
1268           for (i=0; i<nzB; i++) {
1269             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1270             else break;
1271           }
1272           imark = i;
1273         }
1274         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1275         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1276       }
1277     } else {
1278       if (idx) *idx = 0;
1279       if (v)   *v   = 0;
1280     }
1281   }
1282   *nz = nztot;
1283   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1284   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1290 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1291 {
1292   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1293 
1294   PetscFunctionBegin;
1295   if (aij->getrowactive == PETSC_FALSE) {
1296     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1297   }
1298   aij->getrowactive = PETSC_FALSE;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatNorm_MPIAIJ"
1304 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1305 {
1306   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1307   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1308   PetscErrorCode ierr;
1309   int          i,j,cstart = aij->cstart;
1310   PetscReal    sum = 0.0;
1311   PetscScalar  *v;
1312 
1313   PetscFunctionBegin;
1314   if (aij->size == 1) {
1315     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1316   } else {
1317     if (type == NORM_FROBENIUS) {
1318       v = amat->a;
1319       for (i=0; i<amat->nz; i++) {
1320 #if defined(PETSC_USE_COMPLEX)
1321         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1322 #else
1323         sum += (*v)*(*v); v++;
1324 #endif
1325       }
1326       v = bmat->a;
1327       for (i=0; i<bmat->nz; i++) {
1328 #if defined(PETSC_USE_COMPLEX)
1329         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1330 #else
1331         sum += (*v)*(*v); v++;
1332 #endif
1333       }
1334       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1335       *norm = sqrt(*norm);
1336     } else if (type == NORM_1) { /* max column norm */
1337       PetscReal *tmp,*tmp2;
1338       int    *jj,*garray = aij->garray;
1339       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1340       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1341       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1342       *norm = 0.0;
1343       v = amat->a; jj = amat->j;
1344       for (j=0; j<amat->nz; j++) {
1345         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1346       }
1347       v = bmat->a; jj = bmat->j;
1348       for (j=0; j<bmat->nz; j++) {
1349         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1350       }
1351       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1352       for (j=0; j<mat->N; j++) {
1353         if (tmp2[j] > *norm) *norm = tmp2[j];
1354       }
1355       ierr = PetscFree(tmp);CHKERRQ(ierr);
1356       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1357     } else if (type == NORM_INFINITY) { /* max row norm */
1358       PetscReal ntemp = 0.0;
1359       for (j=0; j<aij->A->m; j++) {
1360         v = amat->a + amat->i[j];
1361         sum = 0.0;
1362         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1363           sum += PetscAbsScalar(*v); v++;
1364         }
1365         v = bmat->a + bmat->i[j];
1366         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         if (sum > ntemp) ntemp = sum;
1370       }
1371       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1372     } else {
1373       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1374     }
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatTranspose_MPIAIJ"
1381 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1382 {
1383   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1384   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1385   PetscErrorCode ierr;
1386   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1387   Mat          B;
1388   PetscScalar  *array;
1389 
1390   PetscFunctionBegin;
1391   if (!matout && M != N) {
1392     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1393   }
1394 
1395   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1396   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1397   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1398 
1399   /* copy over the A part */
1400   Aloc = (Mat_SeqAIJ*)a->A->data;
1401   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1402   row = a->rstart;
1403   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1404   for (i=0; i<m; i++) {
1405     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1406     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1407   }
1408   aj = Aloc->j;
1409   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1410 
1411   /* copy over the B part */
1412   Aloc = (Mat_SeqAIJ*)a->B->data;
1413   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1414   row  = a->rstart;
1415   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1416   ct   = cols;
1417   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1418   for (i=0; i<m; i++) {
1419     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1420     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1421   }
1422   ierr = PetscFree(ct);CHKERRQ(ierr);
1423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   if (matout) {
1426     *matout = B;
1427   } else {
1428     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1435 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1436 {
1437   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1438   Mat        a = aij->A,b = aij->B;
1439   PetscErrorCode ierr;
1440   int        s1,s2,s3;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1444   if (rr) {
1445     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1446     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1447     /* Overlap communication with computation. */
1448     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1449   }
1450   if (ll) {
1451     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1452     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1453     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1454   }
1455   /* scale  the diagonal block */
1456   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1457 
1458   if (rr) {
1459     /* Do a scatter end and then right scale the off-diagonal block */
1460     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1461     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1462   }
1463 
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1470 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   if (!a->rank) {
1477     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1484 PetscErrorCode MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1485 {
1486   PetscFunctionBegin;
1487   *bs = 1;
1488   PetscFunctionReturn(0);
1489 }
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1492 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1493 {
1494   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1495   PetscErrorCode ierr;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatEqual_MPIAIJ"
1504 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1505 {
1506   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1507   Mat        a,b,c,d;
1508   PetscTruth flg;
1509   PetscErrorCode ierr;
1510 
1511   PetscFunctionBegin;
1512   a = matA->A; b = matA->B;
1513   c = matB->A; d = matB->B;
1514 
1515   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1516   if (flg == PETSC_TRUE) {
1517     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1518   }
1519   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatCopy_MPIAIJ"
1525 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1526 {
1527   PetscErrorCode ierr;
1528   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1529   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1530 
1531   PetscFunctionBegin;
1532   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1533   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1534     /* because of the column compression in the off-processor part of the matrix a->B,
1535        the number of columns in a->B and b->B may be different, hence we cannot call
1536        the MatCopy() directly on the two parts. If need be, we can provide a more
1537        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1538        then copying the submatrices */
1539     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1540   } else {
1541     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1542     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1543   }
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1549 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1550 {
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #include "petscblaslapack.h"
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatAXPY_MPIAIJ"
1561 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1562 {
1563   PetscErrorCode ierr;
1564   int            i;
1565   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1566   PetscBLASInt   bnz,one=1;
1567   Mat_SeqAIJ     *x,*y;
1568 
1569   PetscFunctionBegin;
1570   if (str == SAME_NONZERO_PATTERN) {
1571     x = (Mat_SeqAIJ *)xx->A->data;
1572     y = (Mat_SeqAIJ *)yy->A->data;
1573     bnz = (PetscBLASInt)x->nz;
1574     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1575     x = (Mat_SeqAIJ *)xx->B->data;
1576     y = (Mat_SeqAIJ *)yy->B->data;
1577     bnz = (PetscBLASInt)x->nz;
1578     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1579   } else if (str == SUBSET_NONZERO_PATTERN) {
1580     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1581 
1582     x = (Mat_SeqAIJ *)xx->B->data;
1583     y = (Mat_SeqAIJ *)yy->B->data;
1584     if (y->xtoy && y->XtoY != xx->B) {
1585       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1586       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1587     }
1588     if (!y->xtoy) { /* get xtoy */
1589       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1590       y->XtoY = xx->B;
1591     }
1592     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1593   } else {
1594     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 /* -------------------------------------------------------------------*/
1600 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1601        MatGetRow_MPIAIJ,
1602        MatRestoreRow_MPIAIJ,
1603        MatMult_MPIAIJ,
1604 /* 4*/ MatMultAdd_MPIAIJ,
1605        MatMultTranspose_MPIAIJ,
1606        MatMultTransposeAdd_MPIAIJ,
1607        0,
1608        0,
1609        0,
1610 /*10*/ 0,
1611        0,
1612        0,
1613        MatRelax_MPIAIJ,
1614        MatTranspose_MPIAIJ,
1615 /*15*/ MatGetInfo_MPIAIJ,
1616        MatEqual_MPIAIJ,
1617        MatGetDiagonal_MPIAIJ,
1618        MatDiagonalScale_MPIAIJ,
1619        MatNorm_MPIAIJ,
1620 /*20*/ MatAssemblyBegin_MPIAIJ,
1621        MatAssemblyEnd_MPIAIJ,
1622        0,
1623        MatSetOption_MPIAIJ,
1624        MatZeroEntries_MPIAIJ,
1625 /*25*/ MatZeroRows_MPIAIJ,
1626 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1627        MatLUFactorSymbolic_MPIAIJ_TFS,
1628 #else
1629        0,
1630 #endif
1631        0,
1632        0,
1633        0,
1634 /*30*/ MatSetUpPreallocation_MPIAIJ,
1635        0,
1636        0,
1637        0,
1638        0,
1639 /*35*/ MatDuplicate_MPIAIJ,
1640        0,
1641        0,
1642        0,
1643        0,
1644 /*40*/ MatAXPY_MPIAIJ,
1645        MatGetSubMatrices_MPIAIJ,
1646        MatIncreaseOverlap_MPIAIJ,
1647        MatGetValues_MPIAIJ,
1648        MatCopy_MPIAIJ,
1649 /*45*/ MatPrintHelp_MPIAIJ,
1650        MatScale_MPIAIJ,
1651        0,
1652        0,
1653        0,
1654 /*50*/ MatGetBlockSize_MPIAIJ,
1655        0,
1656        0,
1657        0,
1658        0,
1659 /*55*/ MatFDColoringCreate_MPIAIJ,
1660        0,
1661        MatSetUnfactored_MPIAIJ,
1662        0,
1663        0,
1664 /*60*/ MatGetSubMatrix_MPIAIJ,
1665        MatDestroy_MPIAIJ,
1666        MatView_MPIAIJ,
1667        MatGetPetscMaps_Petsc,
1668        0,
1669 /*65*/ 0,
1670        0,
1671        0,
1672        0,
1673        0,
1674 /*70*/ 0,
1675        0,
1676        MatSetColoring_MPIAIJ,
1677        MatSetValuesAdic_MPIAIJ,
1678        MatSetValuesAdifor_MPIAIJ,
1679 /*75*/ 0,
1680        0,
1681        0,
1682        0,
1683        0,
1684 /*80*/ 0,
1685        0,
1686        0,
1687        0,
1688 /*84*/ MatLoad_MPIAIJ,
1689        0,
1690        0,
1691        0,
1692        0,
1693        0,
1694 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1695        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1696        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1697        MatPtAP_MPIAIJ_MPIAIJ,
1698        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1699 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1700        0,
1701        0,
1702        0};
1703 
1704 /* ----------------------------------------------------------------------------------------*/
1705 
1706 EXTERN_C_BEGIN
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1709 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1710 {
1711   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1716   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 EXTERN_C_END
1720 
1721 EXTERN_C_BEGIN
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1724 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1725 {
1726   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1727   PetscErrorCode ierr;
1728 
1729   PetscFunctionBegin;
1730   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1731   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1732   PetscFunctionReturn(0);
1733 }
1734 EXTERN_C_END
1735 
1736 #include "petscpc.h"
1737 EXTERN_C_BEGIN
1738 #undef __FUNCT__
1739 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1740 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1741 {
1742   Mat_MPIAIJ   *b;
1743   PetscErrorCode ierr;
1744   int          i;
1745 
1746   PetscFunctionBegin;
1747   B->preallocated = PETSC_TRUE;
1748   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1749   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1750   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1751   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1752   if (d_nnz) {
1753     for (i=0; i<B->m; i++) {
1754       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1755     }
1756   }
1757   if (o_nnz) {
1758     for (i=0; i<B->m; i++) {
1759       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1760     }
1761   }
1762   b = (Mat_MPIAIJ*)B->data;
1763   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1764   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1765 
1766   PetscFunctionReturn(0);
1767 }
1768 EXTERN_C_END
1769 
1770 /*MC
1771    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
1772 
1773    Options Database Keys:
1774 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
1775 
1776   Level: beginner
1777 
1778 .seealso: MatCreateMPIAIJ
1779 M*/
1780 
1781 EXTERN_C_BEGIN
1782 #undef __FUNCT__
1783 #define __FUNCT__ "MatCreate_MPIAIJ"
1784 PetscErrorCode MatCreate_MPIAIJ(Mat B)
1785 {
1786   Mat_MPIAIJ *b;
1787   PetscErrorCode ierr;
1788   int        i,size;
1789 
1790   PetscFunctionBegin;
1791   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1792 
1793   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1794   B->data         = (void*)b;
1795   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1796   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1797   B->factor       = 0;
1798   B->assembled    = PETSC_FALSE;
1799   B->mapping      = 0;
1800 
1801   B->insertmode      = NOT_SET_VALUES;
1802   b->size            = size;
1803   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1804 
1805   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1806   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1807 
1808   /* the information in the maps duplicates the information computed below, eventually
1809      we should remove the duplicate information that is not contained in the maps */
1810   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1811   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1812 
1813   /* build local table of row and column ownerships */
1814   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1815   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1816   b->cowners = b->rowners + b->size + 2;
1817   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1818   b->rowners[0] = 0;
1819   for (i=2; i<=b->size; i++) {
1820     b->rowners[i] += b->rowners[i-1];
1821   }
1822   b->rstart = b->rowners[b->rank];
1823   b->rend   = b->rowners[b->rank+1];
1824   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1825   b->cowners[0] = 0;
1826   for (i=2; i<=b->size; i++) {
1827     b->cowners[i] += b->cowners[i-1];
1828   }
1829   b->cstart = b->cowners[b->rank];
1830   b->cend   = b->cowners[b->rank+1];
1831 
1832   /* build cache for off array entries formed */
1833   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1834   b->donotstash  = PETSC_FALSE;
1835   b->colmap      = 0;
1836   b->garray      = 0;
1837   b->roworiented = PETSC_TRUE;
1838 
1839   /* stuff used for matrix vector multiply */
1840   b->lvec      = PETSC_NULL;
1841   b->Mvctx     = PETSC_NULL;
1842 
1843   /* stuff for MatGetRow() */
1844   b->rowindices   = 0;
1845   b->rowvalues    = 0;
1846   b->getrowactive = PETSC_FALSE;
1847 
1848   /* Explicitly create 2 MATSEQAIJ matrices. */
1849   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
1850   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1851   PetscLogObjectParent(B,b->A);
1852   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
1853   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1854   PetscLogObjectParent(B,b->B);
1855 
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1857                                      "MatStoreValues_MPIAIJ",
1858                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1860                                      "MatRetrieveValues_MPIAIJ",
1861                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1863 				     "MatGetDiagonalBlock_MPIAIJ",
1864                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
1866 				     "MatIsTranspose_MPIAIJ",
1867 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
1868   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1869 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1870 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
1871   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1872 				     "MatDiagonalScaleLocal_MPIAIJ",
1873 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 /*MC
1879    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
1880 
1881    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
1882    and MATMPIAIJ otherwise.  As a result, for single process communicators,
1883   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
1884   for communicators controlling multiple processes.  It is recommended that you call both of
1885   the above preallocation routines for simplicity.
1886 
1887    Options Database Keys:
1888 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
1889 
1890   Level: beginner
1891 
1892 .seealso: MatCreateMPIAIJ,MATSEQAIJ,MATMPIAIJ
1893 M*/
1894 
1895 EXTERN_C_BEGIN
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatCreate_AIJ"
1898 PetscErrorCode MatCreate_AIJ(Mat A)
1899 {
1900   PetscErrorCode ierr;
1901   int size;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJ);CHKERRQ(ierr);
1905   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1906   if (size == 1) {
1907     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
1908   } else {
1909     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 EXTERN_C_END
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1917 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1918 {
1919   Mat        mat;
1920   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1921   PetscErrorCode ierr;
1922 
1923   PetscFunctionBegin;
1924   *newmat       = 0;
1925   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1926   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1927   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1928   a    = (Mat_MPIAIJ*)mat->data;
1929 
1930   mat->factor       = matin->factor;
1931   mat->assembled    = PETSC_TRUE;
1932   mat->insertmode   = NOT_SET_VALUES;
1933   mat->preallocated = PETSC_TRUE;
1934 
1935   a->rstart       = oldmat->rstart;
1936   a->rend         = oldmat->rend;
1937   a->cstart       = oldmat->cstart;
1938   a->cend         = oldmat->cend;
1939   a->size         = oldmat->size;
1940   a->rank         = oldmat->rank;
1941   a->donotstash   = oldmat->donotstash;
1942   a->roworiented  = oldmat->roworiented;
1943   a->rowindices   = 0;
1944   a->rowvalues    = 0;
1945   a->getrowactive = PETSC_FALSE;
1946 
1947   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1948   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1949   if (oldmat->colmap) {
1950 #if defined (PETSC_USE_CTABLE)
1951     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1952 #else
1953     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1954     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1955     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1956 #endif
1957   } else a->colmap = 0;
1958   if (oldmat->garray) {
1959     int len;
1960     len  = oldmat->B->n;
1961     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1962     PetscLogObjectMemory(mat,len*sizeof(int));
1963     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1964   } else a->garray = 0;
1965 
1966   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1967   PetscLogObjectParent(mat,a->lvec);
1968   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1969   PetscLogObjectParent(mat,a->Mvctx);
1970   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1971   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1972   PetscLogObjectParent(mat,a->A);
1973   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1974   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1975   PetscLogObjectParent(mat,a->B);
1976   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1977   *newmat = mat;
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #include "petscsys.h"
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatLoad_MPIAIJ"
1985 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1986 {
1987   Mat            A;
1988   PetscScalar    *vals,*svals;
1989   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1990   MPI_Status     status;
1991   PetscErrorCode ierr;
1992   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1993   int            i,nz,j,rstart,rend,fd;
1994   int            header[4],*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1995   int            *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1996   int            cend,cstart,n;
1997 
1998   PetscFunctionBegin;
1999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2000   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2001   if (!rank) {
2002     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2003     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2004     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2005     if (header[3] < 0) {
2006       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
2007     }
2008   }
2009 
2010   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2011   M = header[1]; N = header[2];
2012   /* determine ownership of all rows */
2013   m = M/size + ((M % size) > rank);
2014   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2015   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2016   rowners[0] = 0;
2017   for (i=2; i<=size; i++) {
2018     rowners[i] += rowners[i-1];
2019   }
2020   rstart = rowners[rank];
2021   rend   = rowners[rank+1];
2022 
2023   /* distribute row lengths to all processors */
2024   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
2025   offlens = ourlens + (rend-rstart);
2026   if (!rank) {
2027     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
2028     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2029     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2030     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
2031     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2032     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2033   } else {
2034     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
2035   }
2036 
2037   if (!rank) {
2038     /* calculate the number of nonzeros on each processor */
2039     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2040     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2041     for (i=0; i<size; i++) {
2042       for (j=rowners[i]; j< rowners[i+1]; j++) {
2043         procsnz[i] += rowlengths[j];
2044       }
2045     }
2046     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2047 
2048     /* determine max buffer needed and allocate it */
2049     maxnz = 0;
2050     for (i=0; i<size; i++) {
2051       maxnz = PetscMax(maxnz,procsnz[i]);
2052     }
2053     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2054 
2055     /* read in my part of the matrix column indices  */
2056     nz   = procsnz[0];
2057     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
2058     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2059 
2060     /* read in every one elses and ship off */
2061     for (i=1; i<size; i++) {
2062       nz   = procsnz[i];
2063       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2064       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2065     }
2066     ierr = PetscFree(cols);CHKERRQ(ierr);
2067   } else {
2068     /* determine buffer space needed for message */
2069     nz = 0;
2070     for (i=0; i<m; i++) {
2071       nz += ourlens[i];
2072     }
2073     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
2074 
2075     /* receive message of column indices*/
2076     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2077     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2078     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2079   }
2080 
2081   /* determine column ownership if matrix is not square */
2082   if (N != M) {
2083     n      = N/size + ((N % size) > rank);
2084     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2085     cstart = cend - n;
2086   } else {
2087     cstart = rstart;
2088     cend   = rend;
2089     n      = cend - cstart;
2090   }
2091 
2092   /* loop over local rows, determining number of off diagonal entries */
2093   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2094   jj = 0;
2095   for (i=0; i<m; i++) {
2096     for (j=0; j<ourlens[i]; j++) {
2097       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2098       jj++;
2099     }
2100   }
2101 
2102   /* create our matrix */
2103   for (i=0; i<m; i++) {
2104     ourlens[i] -= offlens[i];
2105   }
2106   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2107   ierr = MatSetType(A,type);CHKERRQ(ierr);
2108   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2109 
2110   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2111   for (i=0; i<m; i++) {
2112     ourlens[i] += offlens[i];
2113   }
2114 
2115   if (!rank) {
2116     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2117 
2118     /* read in my part of the matrix numerical values  */
2119     nz   = procsnz[0];
2120     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2121 
2122     /* insert into matrix */
2123     jj      = rstart;
2124     smycols = mycols;
2125     svals   = vals;
2126     for (i=0; i<m; i++) {
2127       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2128       smycols += ourlens[i];
2129       svals   += ourlens[i];
2130       jj++;
2131     }
2132 
2133     /* read in other processors and ship out */
2134     for (i=1; i<size; i++) {
2135       nz   = procsnz[i];
2136       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2137       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2138     }
2139     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2140   } else {
2141     /* receive numeric values */
2142     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2143 
2144     /* receive message of values*/
2145     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2146     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2147     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2148 
2149     /* insert into matrix */
2150     jj      = rstart;
2151     smycols = mycols;
2152     svals   = vals;
2153     for (i=0; i<m; i++) {
2154       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2155       smycols += ourlens[i];
2156       svals   += ourlens[i];
2157       jj++;
2158     }
2159   }
2160   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2161   ierr = PetscFree(vals);CHKERRQ(ierr);
2162   ierr = PetscFree(mycols);CHKERRQ(ierr);
2163   ierr = PetscFree(rowners);CHKERRQ(ierr);
2164 
2165   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   *newmat = A;
2168   PetscFunctionReturn(0);
2169 }
2170 
2171 #undef __FUNCT__
2172 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2173 /*
2174     Not great since it makes two copies of the submatrix, first an SeqAIJ
2175   in local and then by concatenating the local matrices the end result.
2176   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2177 */
2178 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2179 {
2180   PetscErrorCode ierr;
2181   PetscMPIInt    rank,size;
2182   int            i,m,n,rstart,row,rend,nz,*cwork,j;
2183   int            *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2184   Mat            *local,M,Mreuse;
2185   PetscScalar    *vwork,*aa;
2186   MPI_Comm       comm = mat->comm;
2187   Mat_SeqAIJ     *aij;
2188 
2189 
2190   PetscFunctionBegin;
2191   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2193 
2194   if (call ==  MAT_REUSE_MATRIX) {
2195     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2196     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2197     local = &Mreuse;
2198     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2199   } else {
2200     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2201     Mreuse = *local;
2202     ierr   = PetscFree(local);CHKERRQ(ierr);
2203   }
2204 
2205   /*
2206       m - number of local rows
2207       n - number of columns (same on all processors)
2208       rstart - first row in new global matrix generated
2209   */
2210   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2211   if (call == MAT_INITIAL_MATRIX) {
2212     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2213     ii  = aij->i;
2214     jj  = aij->j;
2215 
2216     /*
2217         Determine the number of non-zeros in the diagonal and off-diagonal
2218         portions of the matrix in order to do correct preallocation
2219     */
2220 
2221     /* first get start and end of "diagonal" columns */
2222     if (csize == PETSC_DECIDE) {
2223       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2224       if (mglobal == n) { /* square matrix */
2225 	nlocal = m;
2226       } else {
2227         nlocal = n/size + ((n % size) > rank);
2228       }
2229     } else {
2230       nlocal = csize;
2231     }
2232     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2233     rstart = rend - nlocal;
2234     if (rank == size - 1 && rend != n) {
2235       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2236     }
2237 
2238     /* next, compute all the lengths */
2239     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2240     olens = dlens + m;
2241     for (i=0; i<m; i++) {
2242       jend = ii[i+1] - ii[i];
2243       olen = 0;
2244       dlen = 0;
2245       for (j=0; j<jend; j++) {
2246         if (*jj < rstart || *jj >= rend) olen++;
2247         else dlen++;
2248         jj++;
2249       }
2250       olens[i] = olen;
2251       dlens[i] = dlen;
2252     }
2253     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2254     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2255     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2256     ierr = PetscFree(dlens);CHKERRQ(ierr);
2257   } else {
2258     int ml,nl;
2259 
2260     M = *newmat;
2261     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2262     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2263     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2264     /*
2265          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2266        rather than the slower MatSetValues().
2267     */
2268     M->was_assembled = PETSC_TRUE;
2269     M->assembled     = PETSC_FALSE;
2270   }
2271   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2272   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2273   ii  = aij->i;
2274   jj  = aij->j;
2275   aa  = aij->a;
2276   for (i=0; i<m; i++) {
2277     row   = rstart + i;
2278     nz    = ii[i+1] - ii[i];
2279     cwork = jj;     jj += nz;
2280     vwork = aa;     aa += nz;
2281     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2282   }
2283 
2284   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   *newmat = M;
2287 
2288   /* save submatrix used in processor for next request */
2289   if (call ==  MAT_INITIAL_MATRIX) {
2290     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2291     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2292   }
2293 
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2299 /*@C
2300    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2301    (the default parallel PETSc format).  For good matrix assembly performance
2302    the user should preallocate the matrix storage by setting the parameters
2303    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2304    performance can be increased by more than a factor of 50.
2305 
2306    Collective on MPI_Comm
2307 
2308    Input Parameters:
2309 +  A - the matrix
2310 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2311            (same value is used for all local rows)
2312 .  d_nnz - array containing the number of nonzeros in the various rows of the
2313            DIAGONAL portion of the local submatrix (possibly different for each row)
2314            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2315            The size of this array is equal to the number of local rows, i.e 'm'.
2316            You must leave room for the diagonal entry even if it is zero.
2317 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2318            submatrix (same value is used for all local rows).
2319 -  o_nnz - array containing the number of nonzeros in the various rows of the
2320            OFF-DIAGONAL portion of the local submatrix (possibly different for
2321            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2322            structure. The size of this array is equal to the number
2323            of local rows, i.e 'm'.
2324 
2325    The AIJ format (also called the Yale sparse matrix format or
2326    compressed row storage), is fully compatible with standard Fortran 77
2327    storage.  That is, the stored row and column indices can begin at
2328    either one (as in Fortran) or zero.  See the users manual for details.
2329 
2330    The user MUST specify either the local or global matrix dimensions
2331    (possibly both).
2332 
2333    The parallel matrix is partitioned such that the first m0 rows belong to
2334    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2335    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2336 
2337    The DIAGONAL portion of the local submatrix of a processor can be defined
2338    as the submatrix which is obtained by extraction the part corresponding
2339    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2340    first row that belongs to the processor, and r2 is the last row belonging
2341    to the this processor. This is a square mxm matrix. The remaining portion
2342    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2343 
2344    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2345 
2346    By default, this format uses inodes (identical nodes) when possible.
2347    We search for consecutive rows with the same nonzero structure, thereby
2348    reusing matrix information to achieve increased efficiency.
2349 
2350    Options Database Keys:
2351 +  -mat_aij_no_inode  - Do not use inodes
2352 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2353 -  -mat_aij_oneindex - Internally use indexing starting at 1
2354         rather than 0.  Note that when calling MatSetValues(),
2355         the user still MUST index entries starting at 0!
2356 
2357    Example usage:
2358 
2359    Consider the following 8x8 matrix with 34 non-zero values, that is
2360    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2361    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2362    as follows:
2363 
2364 .vb
2365             1  2  0  |  0  3  0  |  0  4
2366     Proc0   0  5  6  |  7  0  0  |  8  0
2367             9  0 10  | 11  0  0  | 12  0
2368     -------------------------------------
2369            13  0 14  | 15 16 17  |  0  0
2370     Proc1   0 18  0  | 19 20 21  |  0  0
2371             0  0  0  | 22 23  0  | 24  0
2372     -------------------------------------
2373     Proc2  25 26 27  |  0  0 28  | 29  0
2374            30  0  0  | 31 32 33  |  0 34
2375 .ve
2376 
2377    This can be represented as a collection of submatrices as:
2378 
2379 .vb
2380       A B C
2381       D E F
2382       G H I
2383 .ve
2384 
2385    Where the submatrices A,B,C are owned by proc0, D,E,F are
2386    owned by proc1, G,H,I are owned by proc2.
2387 
2388    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2389    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2390    The 'M','N' parameters are 8,8, and have the same values on all procs.
2391 
2392    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2393    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2394    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2395    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2396    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2397    matrix, ans [DF] as another SeqAIJ matrix.
2398 
2399    When d_nz, o_nz parameters are specified, d_nz storage elements are
2400    allocated for every row of the local diagonal submatrix, and o_nz
2401    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2402    One way to choose d_nz and o_nz is to use the max nonzerors per local
2403    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2404    In this case, the values of d_nz,o_nz are:
2405 .vb
2406      proc0 : dnz = 2, o_nz = 2
2407      proc1 : dnz = 3, o_nz = 2
2408      proc2 : dnz = 1, o_nz = 4
2409 .ve
2410    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2411    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2412    for proc3. i.e we are using 12+15+10=37 storage locations to store
2413    34 values.
2414 
2415    When d_nnz, o_nnz parameters are specified, the storage is specified
2416    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2417    In the above case the values for d_nnz,o_nnz are:
2418 .vb
2419      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2420      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2421      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2422 .ve
2423    Here the space allocated is sum of all the above values i.e 34, and
2424    hence pre-allocation is perfect.
2425 
2426    Level: intermediate
2427 
2428 .keywords: matrix, aij, compressed row, sparse, parallel
2429 
2430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2431 @*/
2432 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2433 {
2434   PetscErrorCode ierr,(*f)(Mat,int,const int[],int,const int[]);
2435 
2436   PetscFunctionBegin;
2437   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2438   if (f) {
2439     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2440   }
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatCreateMPIAIJ"
2446 /*@C
2447    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2448    (the default parallel PETSc format).  For good matrix assembly performance
2449    the user should preallocate the matrix storage by setting the parameters
2450    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2451    performance can be increased by more than a factor of 50.
2452 
2453    Collective on MPI_Comm
2454 
2455    Input Parameters:
2456 +  comm - MPI communicator
2457 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2458            This value should be the same as the local size used in creating the
2459            y vector for the matrix-vector product y = Ax.
2460 .  n - This value should be the same as the local size used in creating the
2461        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2462        calculated if N is given) For square matrices n is almost always m.
2463 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2464 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2465 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2466            (same value is used for all local rows)
2467 .  d_nnz - array containing the number of nonzeros in the various rows of the
2468            DIAGONAL portion of the local submatrix (possibly different for each row)
2469            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2470            The size of this array is equal to the number of local rows, i.e 'm'.
2471            You must leave room for the diagonal entry even if it is zero.
2472 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2473            submatrix (same value is used for all local rows).
2474 -  o_nnz - array containing the number of nonzeros in the various rows of the
2475            OFF-DIAGONAL portion of the local submatrix (possibly different for
2476            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2477            structure. The size of this array is equal to the number
2478            of local rows, i.e 'm'.
2479 
2480    Output Parameter:
2481 .  A - the matrix
2482 
2483    Notes:
2484    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2485    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2486    storage requirements for this matrix.
2487 
2488    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2489    processor than it must be used on all processors that share the object for
2490    that argument.
2491 
2492    The AIJ format (also called the Yale sparse matrix format or
2493    compressed row storage), is fully compatible with standard Fortran 77
2494    storage.  That is, the stored row and column indices can begin at
2495    either one (as in Fortran) or zero.  See the users manual for details.
2496 
2497    The user MUST specify either the local or global matrix dimensions
2498    (possibly both).
2499 
2500    The parallel matrix is partitioned such that the first m0 rows belong to
2501    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2502    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2503 
2504    The DIAGONAL portion of the local submatrix of a processor can be defined
2505    as the submatrix which is obtained by extraction the part corresponding
2506    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2507    first row that belongs to the processor, and r2 is the last row belonging
2508    to the this processor. This is a square mxm matrix. The remaining portion
2509    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2510 
2511    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2512 
2513    When calling this routine with a single process communicator, a matrix of
2514    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2515    type of communicator, use the construction mechanism:
2516      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2517 
2518    By default, this format uses inodes (identical nodes) when possible.
2519    We search for consecutive rows with the same nonzero structure, thereby
2520    reusing matrix information to achieve increased efficiency.
2521 
2522    Options Database Keys:
2523 +  -mat_aij_no_inode  - Do not use inodes
2524 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2525 -  -mat_aij_oneindex - Internally use indexing starting at 1
2526         rather than 0.  Note that when calling MatSetValues(),
2527         the user still MUST index entries starting at 0!
2528 
2529 
2530    Example usage:
2531 
2532    Consider the following 8x8 matrix with 34 non-zero values, that is
2533    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2534    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2535    as follows:
2536 
2537 .vb
2538             1  2  0  |  0  3  0  |  0  4
2539     Proc0   0  5  6  |  7  0  0  |  8  0
2540             9  0 10  | 11  0  0  | 12  0
2541     -------------------------------------
2542            13  0 14  | 15 16 17  |  0  0
2543     Proc1   0 18  0  | 19 20 21  |  0  0
2544             0  0  0  | 22 23  0  | 24  0
2545     -------------------------------------
2546     Proc2  25 26 27  |  0  0 28  | 29  0
2547            30  0  0  | 31 32 33  |  0 34
2548 .ve
2549 
2550    This can be represented as a collection of submatrices as:
2551 
2552 .vb
2553       A B C
2554       D E F
2555       G H I
2556 .ve
2557 
2558    Where the submatrices A,B,C are owned by proc0, D,E,F are
2559    owned by proc1, G,H,I are owned by proc2.
2560 
2561    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2562    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2563    The 'M','N' parameters are 8,8, and have the same values on all procs.
2564 
2565    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2566    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2567    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2568    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2569    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2570    matrix, ans [DF] as another SeqAIJ matrix.
2571 
2572    When d_nz, o_nz parameters are specified, d_nz storage elements are
2573    allocated for every row of the local diagonal submatrix, and o_nz
2574    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2575    One way to choose d_nz and o_nz is to use the max nonzerors per local
2576    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2577    In this case, the values of d_nz,o_nz are:
2578 .vb
2579      proc0 : dnz = 2, o_nz = 2
2580      proc1 : dnz = 3, o_nz = 2
2581      proc2 : dnz = 1, o_nz = 4
2582 .ve
2583    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2584    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2585    for proc3. i.e we are using 12+15+10=37 storage locations to store
2586    34 values.
2587 
2588    When d_nnz, o_nnz parameters are specified, the storage is specified
2589    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2590    In the above case the values for d_nnz,o_nnz are:
2591 .vb
2592      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2593      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2594      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2595 .ve
2596    Here the space allocated is sum of all the above values i.e 34, and
2597    hence pre-allocation is perfect.
2598 
2599    Level: intermediate
2600 
2601 .keywords: matrix, aij, compressed row, sparse, parallel
2602 
2603 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2604 @*/
2605 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2606 {
2607   PetscErrorCode ierr;
2608   int size;
2609 
2610   PetscFunctionBegin;
2611   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2613   if (size > 1) {
2614     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2615     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2616   } else {
2617     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2618     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2619   }
2620   PetscFunctionReturn(0);
2621 }
2622 
2623 #undef __FUNCT__
2624 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2626 {
2627   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2628   PetscFunctionBegin;
2629   *Ad     = a->A;
2630   *Ao     = a->B;
2631   *colmap = a->garray;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2637 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2638 {
2639   PetscErrorCode ierr;
2640   int        i;
2641   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2642 
2643   PetscFunctionBegin;
2644   if (coloring->ctype == IS_COLORING_LOCAL) {
2645     ISColoringValue *allcolors,*colors;
2646     ISColoring      ocoloring;
2647 
2648     /* set coloring for diagonal portion */
2649     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2650 
2651     /* set coloring for off-diagonal portion */
2652     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2653     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2654     for (i=0; i<a->B->n; i++) {
2655       colors[i] = allcolors[a->garray[i]];
2656     }
2657     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2658     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2659     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2660     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2661   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2662     ISColoringValue *colors;
2663     int             *larray;
2664     ISColoring      ocoloring;
2665 
2666     /* set coloring for diagonal portion */
2667     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2668     for (i=0; i<a->A->n; i++) {
2669       larray[i] = i + a->cstart;
2670     }
2671     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2672     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2673     for (i=0; i<a->A->n; i++) {
2674       colors[i] = coloring->colors[larray[i]];
2675     }
2676     ierr = PetscFree(larray);CHKERRQ(ierr);
2677     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2678     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2679     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2680 
2681     /* set coloring for off-diagonal portion */
2682     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2683     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2684     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2685     for (i=0; i<a->B->n; i++) {
2686       colors[i] = coloring->colors[larray[i]];
2687     }
2688     ierr = PetscFree(larray);CHKERRQ(ierr);
2689     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2690     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2691     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2692   } else {
2693     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2694   }
2695 
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2701 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2702 {
2703   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2708   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2714 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2715 {
2716   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2721   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatMerge"
2727 /*@C
2728       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2729                  matrices from each processor
2730 
2731     Collective on MPI_Comm
2732 
2733    Input Parameters:
2734 +    comm - the communicators the parallel matrix will live on
2735 .    inmat - the input sequential matrices
2736 .    n - number of local columns (or PETSC_DECIDE)
2737 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2738 
2739    Output Parameter:
2740 .    outmat - the parallel matrix generated
2741 
2742     Level: advanced
2743 
2744    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2745 
2746 @*/
2747 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2748 {
2749   PetscErrorCode ierr;
2750   int               m,N,i,rstart,nnz,I,*dnz,*onz;
2751   const int         *indx;
2752   const PetscScalar *values;
2753   PetscMap          columnmap,rowmap;
2754 
2755   PetscFunctionBegin;
2756 
2757   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2758   if (scall == MAT_INITIAL_MATRIX){
2759     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2760     if (n == PETSC_DECIDE){
2761       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2762       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2763       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2764       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2765       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2766     }
2767 
2768     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2769     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2770     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2771     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2772     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2773 
2774     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2775     for (i=0;i<m;i++) {
2776       ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2777       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2778       ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2779     }
2780     /* This routine will ONLY return MPIAIJ type matrix */
2781     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2782     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2783     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2784     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2785 
2786   } else if (scall == MAT_REUSE_MATRIX){
2787     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2788   } else {
2789     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2790   }
2791 
2792   for (i=0;i<m;i++) {
2793     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2794     I    = i + rstart;
2795     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2796     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2797   }
2798   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2799   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2800   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2801 
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatFileSplit"
2807 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2808 {
2809   PetscErrorCode    ierr;
2810   PetscMPIInt       rank;
2811   int               m,N,i,rstart,nnz;
2812   size_t            len;
2813   const int         *indx;
2814   PetscViewer       out;
2815   char              *name;
2816   Mat               B;
2817   const PetscScalar *values;
2818 
2819   PetscFunctionBegin;
2820 
2821   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2822   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2823   /* Should this be the type of the diagonal block of A? */
2824   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2825   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2826   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2827   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2828   for (i=0;i<m;i++) {
2829     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2830     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2831     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2832   }
2833   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2834   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2835 
2836   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2837   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2838   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2839   sprintf(name,"%s.%d",outfile,rank);
2840   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2841   ierr = PetscFree(name);
2842   ierr = MatView(B,out);CHKERRQ(ierr);
2843   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2844   ierr = MatDestroy(B);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2851 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2852 {
2853   PetscErrorCode       ierr;
2854   Mat_Merge_SeqsToMPI  *merge;
2855   PetscObjectContainer container;
2856 
2857   PetscFunctionBegin;
2858   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2859   if (container) {
2860     ierr  = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2862     ierr = PetscFree(merge->len_sra);CHKERRQ(ierr);
2863     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2864     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2865     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2866     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2867     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2868     ierr = MatDestroy(merge->C_seq);CHKERRQ(ierr);
2869 
2870     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2871     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2872   }
2873   ierr = PetscFree(merge);CHKERRQ(ierr);
2874 
2875   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 #include "src/mat/utils/freespace.h"
2880 #include "petscbt.h"
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatMerge_SeqsToMPI"
2883 /*@C
2884       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2885                  matrices from each processor
2886 
2887     Collective on MPI_Comm
2888 
2889    Input Parameters:
2890 +    comm - the communicators the parallel matrix will live on
2891 .    seqmat - the input sequential matrices
2892 .    m - number of local rows (or PETSC_DECIDE)
2893 .    n - number of local columns (or PETSC_DECIDE)
2894 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2895 
2896    Output Parameter:
2897 .    mpimat - the parallel matrix generated
2898 
2899     Level: advanced
2900 
2901    Notes:
2902      The dimensions of the sequential matrix in each processor MUST be the same.
2903      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2904      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2905 @*/
2906 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
2907 {
2908   PetscErrorCode    ierr;
2909   Mat               B_seq,B_mpi;
2910   Mat_SeqAIJ        *a=(Mat_SeqAIJ*)seqmat->data;
2911   PetscMPIInt       size,rank;
2912   int               M=seqmat->m,N=seqmat->n,i,j,*owners,*ai=a->i,*aj=a->j;
2913   int               tag,taga,len,len_a = 0,*len_s,*len_sa,proc,*len_r,*len_ra;
2914   int               tagi,tagj,*len_si,*len_ri,*len_rj,*id_r,**buf_ri,**buf_rj;  /* new! */
2915   int               **ijbuf_r,*ijbuf_s,*nnz_ptr,k,anzi,*bj_i,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0,nextaj;
2916   MPI_Request       *s_waits,*r_waits,*si_waits,*sj_waits,*ri_waits,*rj_waits,*s_waitsa,*r_waitsa;
2917   MPI_Status        *status;
2918   MatScalar         *ba,*aa=a->a,**abuf_r,*abuf_i,*ba_i;
2919   FreeSpaceList     free_space=PETSC_NULL,current_space=PETSC_NULL;
2920   PetscBT           lnkbt;
2921   Mat_Merge_SeqsToMPI  *merge;
2922   PetscObjectContainer container;
2923 
2924 
2925   PetscFunctionBegin;
2926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2927   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2928   /* ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] nnz(C): %d\n",rank,ai[M]); */
2929   if (scall == MAT_INITIAL_MATRIX){
2930     ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2931   } else if (scall == MAT_REUSE_MATRIX){
2932     B_mpi = *mpimat;
2933     ierr = PetscObjectQuery((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2934     if (container) {
2935       ierr  = PetscObjectContainerGetPointer(container,(void *)&merge);CHKERRQ(ierr);
2936     }
2937     bi      = merge->bi;
2938     bj      = merge->bj;
2939     buf_ri  = merge->buf_ri;
2940     buf_rj  = merge->buf_rj;
2941   } else {
2942     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",scall);
2943   }
2944   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2945 
2946   /* determine the number of messages to send, their lengths */
2947   /*---------------------------------------------------------*/
2948   if (scall == MAT_INITIAL_MATRIX){
2949     ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
2950     if (m == PETSC_DECIDE) {
2951       ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
2952     } else {
2953       ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
2954     }
2955     ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
2956     ierr = PetscMalloc(2*size*sizeof(int),&len_s);CHKERRQ(ierr);
2957     len_si = len_s + size;
2958     ierr = PetscMalloc(2*size*sizeof(int),&merge->len_sra);CHKERRQ(ierr);
2959   }
2960   if (m == PETSC_DECIDE) ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2961   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2962 
2963   len_sa  = merge->len_sra;
2964   len_ra = merge->len_sra + size;
2965 
2966   if (scall == MAT_INITIAL_MATRIX){
2967     merge->nsend = 0;
2968     len = len_a = 0;
2969     for (proc=0; proc<size; proc++){
2970       len_si[proc] = 0;  /* new */
2971       len_s[proc] = len_sa[proc] = 0;
2972       if (proc == rank) continue;
2973       len_si[proc] = owners[proc+1] - owners[proc] + 1;
2974       len_sa[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* rows sent to [proc] */
2975 
2976       if (len_sa[proc]) {
2977         merge->nsend++;
2978         len_s[proc] = len_sa[proc] + owners[proc+1] - owners[proc];
2979         if (len < len_s[proc]) len = len_s[proc];
2980         if (len_a < len_sa[proc]) len_a = len_sa[proc];
2981         /*
2982         ierr = PetscPrintf(PETSC_COMM_SELF," [%d] send len_si=%d to [%d]\n",rank,len_si[proc],proc);
2983         */
2984       }
2985     }
2986 
2987     /* determine the number and length of messages to receive */
2988     /*--------------------------------------------------------*/
2989     ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
2990     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_s,&merge->id_r,&len_r);CHKERRQ(ierr); /* rm! */
2991     ierr = PetscFree(merge->id_r); /* rm! */
2992     ierr = PetscGatherMessageLengths(comm,merge->nsend,merge->nrecv,len_sa,&merge->id_r,&len_rj);CHKERRQ(ierr);
2993 
2994     ierr = PetscMalloc(size*sizeof(int),&len_ri);CHKERRQ(ierr);
2995     ierr = PetscMemzero(len_ri,size*sizeof(int));CHKERRQ(ierr);
2996 
2997     for (i=0; i<merge->nrecv; i++){
2998       proc = merge->id_r[i];
2999       len_ri[i] = owners[rank+1] - owners[rank] + 1;
3000       /*
3001         ierr = PetscPrintf(PETSC_COMM_SELF," [%d] expects recv len_ri=%d len_rj=%d len_r=%d from [%d]\n",rank,len_ri[i],len_rj[i],len_r[i],proc); */
3002     }
3003 
3004     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %d, nrecv: %d\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3005     for (i=0; i<merge->nrecv; i++){
3006       ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   expects recv len_r=%d from [%d]\n",len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3007     }
3008 
3009     /* post the Irecvs corresponding to these messages */
3010     /*-------------------------------------------------*/
3011     /* get new tag to keep the communication clean */
3012     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tag);CHKERRQ(ierr);
3013     ierr = PetscPostIrecvInt(comm,tag,merge->nrecv,merge->id_r,len_r,&ijbuf_r,&r_waits);CHKERRQ(ierr);
3014 
3015     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3016     ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,len_rj,&buf_rj,&rj_waits);CHKERRQ(ierr);
3017 
3018     /* post the sends of ij-structure */
3019     /*--------------------------------*/
3020     ierr = PetscMalloc((3*merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3021     si_waits = s_waits + merge->nsend;
3022     sj_waits = si_waits + merge->nsend;
3023     ierr = PetscMalloc((len+1)*sizeof(int),&ijbuf_s);CHKERRQ(ierr);
3024     k = 0;
3025     for (proc=0; proc<size; proc++){
3026       if (!len_s[proc]) continue;
3027       /* form outgoing ij data to be sent to [proc] */
3028       nnz_ptr = ijbuf_s + owners[proc+1] - owners[proc];
3029       for (i=owners[proc]; i<owners[proc+1]; i++){ /* rows sent to [proc] */
3030         anzi = ai[i+1] - ai[i];
3031         if (i==owners[proc]){
3032           ijbuf_s[i-owners[proc]] = anzi;
3033         } else {
3034           ijbuf_s[i-owners[proc]] = ijbuf_s[i-owners[proc]-1]+ anzi;
3035         }
3036         for (j=0; j <anzi; j++){
3037           *nnz_ptr = *(aj+ai[i]+j); nnz_ptr++;
3038         }
3039       }
3040       ierr = MPI_Isend(ijbuf_s,len_s[proc],MPI_INT,proc,tag,comm,s_waits+k);CHKERRQ(ierr);
3041       i = owners[proc];
3042       ierr = MPI_Isend(aj+ai[i],len_sa[proc],MPI_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3043       k++;
3044     }
3045 
3046     /* receives and sends of ij-structure are complete, deallocate memories */
3047     /*----------------------------------------------------------------------*/
3048     ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
3049     ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
3050 
3051     ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3052     ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3053 
3054     /* send and recv i-structure */
3055     /*---------------------------*/
3056     ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3057     ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3058 
3059     k = 0;
3060     for (proc=0; proc<size; proc++){
3061       if (!len_s[proc]) continue;
3062       i    = owners[proc];
3063       /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] send %d i-struct to [%d]\n",rank,len_si[proc],proc); */
3064       ierr = MPI_Isend(ai+i,len_si[proc],MPI_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3065       k++;
3066     }
3067     ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3068     /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] recv i-struct done\n",rank); */
3069     ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3070 
3071     ierr = PetscFree(len_ri);CHKERRQ(ierr);
3072     ierr = PetscFree(len_rj);CHKERRQ(ierr);
3073     ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3074     ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3075 
3076     ierr = PetscFree(ijbuf_s);CHKERRQ(ierr);
3077     ierr = PetscFree(s_waits);CHKERRQ(ierr);
3078     ierr = PetscFree(r_waits);CHKERRQ(ierr);
3079 
3080     /* create seq matrix B_seq in each processor */
3081     /*-------------------------------------------*/
3082     ierr = PetscFree(len_s);CHKERRQ(ierr);
3083     /* allocate bi array and free space for accumulating nonzero column info */
3084     ierr = PetscMalloc((m+1)*sizeof(int),&bi);CHKERRQ(ierr);
3085     bi[0] = 0;
3086 
3087     /* create and initialize a linked list */
3088     nlnk = N+1;
3089     ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3090 
3091     /* initial FreeSpace size is (nproc)*(num of local nnz(seqmat)) */
3092     len = 0;
3093     for (i=owners[rank]; i<owners[rank+1]; i++) len += ai[i+1] - ai[i];
3094     ierr = GetMoreSpace((int)(size*len+1),&free_space);CHKERRQ(ierr);
3095     current_space = free_space;
3096 
3097     /* determine symbolic info for each row of B_seq */
3098     for (i=0;i<m;i++) {
3099       bnzi   = 0;
3100       /* add local non-zero cols of this proc's seqmat into lnk */
3101       arow   = owners[rank] + i;
3102       anzi   = ai[arow+1] - ai[arow];
3103       aj     = a->j + ai[arow];
3104       ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3105       bnzi += nlnk;
3106       /* add received col data into lnk */
3107       for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3108         /* i-th row */
3109         anzi = *(buf_ri[k]+i+1) - *(buf_ri[k]+i);
3110         aj   = buf_rj[k] + (*(buf_ri[k]+i) - *(buf_ri[k]));
3111 #ifdef OLD
3112         if (i == 0){
3113           /* anzi = *(ijbuf_r[k]+i); */
3114           aj   = ijbuf_r[k] + m;
3115         } else {
3116           /* anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); */
3117           aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1);
3118         }
3119 #endif
3120         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3121         bnzi += nlnk;
3122       }
3123 
3124       /* if free space is not available, make more free space */
3125       if (current_space->local_remaining<bnzi) {
3126         ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3127         nspacedouble++;
3128       }
3129       /* copy data into free space, then initialize lnk */
3130       ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3131       current_space->array           += bnzi;
3132       current_space->local_used      += bnzi;
3133       current_space->local_remaining -= bnzi;
3134 
3135       bi[i+1] = bi[i] + bnzi;
3136     }
3137     ierr = PetscMalloc((bi[m]+1)*sizeof(int),&bj);CHKERRQ(ierr);
3138     ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3139 
3140     ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3141 
3142     /* allocate space for ba */
3143     ierr = PetscMalloc((bi[m]+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr);
3144     ierr = PetscMemzero(ba,(bi[m]+1)*sizeof(MatScalar));CHKERRQ(ierr);
3145 
3146     /* put together B_seq */
3147     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,bi,bj,ba,&B_seq);CHKERRQ(ierr);
3148     ierr = PetscFree(ba);CHKERRQ(ierr); /* bi and bj are saved for reuse */
3149 
3150     /* create symbolic parallel matrix B_mpi by concatinating B_seq */
3151     /*--------------------------------------------------------------*/
3152     ierr = MatMerge(comm,B_seq,n,scall,&B_mpi);CHKERRQ(ierr);
3153     B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3154     merge->bi            = bi;
3155     merge->bj            = bj;
3156     merge->buf_ri        = buf_ri;
3157     merge->buf_rj        = buf_rj;
3158     merge->C_seq         = seqmat;
3159 
3160     /* attach the supporting struct to B_mpi for reuse */
3161     ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3162     ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3163     ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3164     *mpimat = B_mpi;
3165   } /* if (scall == MAT_INITIAL_MATRIX) */
3166 
3167   /* send and recv matrix values */
3168   /*-----------------------------*/
3169   if (scall == MAT_INITIAL_MATRIX){
3170     for (i=0; i<merge->nrecv; i++) len_ra[i] = len_r[i]-m; /* length of seqmat->a to be received from a proc */
3171     ierr = PetscFree(len_r);CHKERRQ(ierr);
3172   }
3173   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
3174   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,len_ra,&abuf_r,&r_waitsa);CHKERRQ(ierr);
3175 
3176   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waitsa);CHKERRQ(ierr);
3177   k = 0;
3178   for (proc=0; proc<size; proc++){
3179     if (!len_sa[proc]) continue;
3180     i = owners[proc];
3181     ierr = MPI_Isend(aa+ai[i],len_sa[proc],MPIU_MATSCALAR,proc,taga,comm,s_waitsa+k);CHKERRQ(ierr);
3182     k++;
3183   }
3184 
3185   ierr = MPI_Waitall(merge->nrecv,r_waitsa,status);CHKERRQ(ierr);
3186   ierr = MPI_Waitall(merge->nsend,s_waitsa,status);CHKERRQ(ierr);
3187   ierr = PetscFree(status);CHKERRQ(ierr);
3188 
3189   ierr = PetscFree(s_waitsa);CHKERRQ(ierr);
3190   ierr = PetscFree(r_waitsa);CHKERRQ(ierr);
3191 
3192   /* insert mat values of B_mpi */
3193   /*----------------------------*/
3194   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3195 
3196   /* set values of ba */
3197   for (i=0; i<m; i++) {
3198     arow = owners[rank] + i;
3199     bj_i = bj+bi[i];  /* col indices of the i-th row of B_seq */
3200     bnzi = bi[i+1] - bi[i];
3201     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3202 
3203     /* add local non-zero vals of this proc's seqmat into ba */
3204     anzi = ai[arow+1] - ai[arow];
3205     aj   = a->j + ai[arow];
3206     aa   = a->a + ai[arow];
3207     nextaj = 0;
3208     for (j=0; nextaj<anzi; j++){
3209       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3210         ba_i[j] += aa[nextaj++];
3211       }
3212     }
3213 
3214     /* add received vals into ba */
3215     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3216       /* i-th row */
3217       anzi = *(buf_ri[k]+i+1) - *(buf_ri[k]+i);
3218       aj   = buf_rj[k] + (*(buf_ri[k]+i) - *(buf_ri[k]));
3219       aa   = abuf_r[k] + (*(buf_ri[k]+i) - *(buf_ri[k]));
3220 #ifdef OLD
3221       if (i == 0){
3222         /* anzi = *(ijbuf_r[k]+i); */
3223         /* aj   = ijbuf_r[k] + m; */
3224         aa   = abuf_r[k];
3225       } else {
3226         /* anzi = *(ijbuf_r[k]+i) - *(ijbuf_r[k]+i-1); */
3227         /* aj   = ijbuf_r[k]+m + *(ijbuf_r[k]+i-1); */
3228         aa   = abuf_r[k] + *(ijbuf_r[k]+i-1);
3229       }
3230 #endif
3231       nextaj = 0;
3232       for (j=0; nextaj<anzi; j++){
3233         if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3234           ba_i[j] += aa[nextaj++];
3235         }
3236       }
3237     }
3238 
3239     ierr = MatSetValues(B_mpi,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3240   }
3241   ierr = MatAssemblyBegin(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3242   ierr = MatAssemblyEnd(B_mpi,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3243 
3244   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3245   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3246   if (scall == MAT_INITIAL_MATRIX){
3247     ierr = PetscFree(ijbuf_r); /* rm! */
3248   }
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatGetLocalMat"
3254 /*@C
3255      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3256 
3257     Collective on Mat
3258 
3259    Input Parameters:
3260 +    A - the matrix
3261 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3262 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3263 
3264    Output Parameter:
3265 .    A_loc - the local sequential matrix generated
3266 
3267     Level: developer
3268 
3269 @*/
3270 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3271 {
3272   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3273   PetscErrorCode    ierr;
3274   int               *idx,i,start,end,ncols,nzA,nzB,*cmap,imark;
3275   IS                isrowa,iscola;
3276   Mat               *aloc;
3277 
3278   PetscFunctionBegin;
3279   if (!row){
3280     start = a->rstart; end = a->rend;
3281     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3282   } else {
3283     isrowa = *row;
3284   }
3285   if (!col){
3286     start = a->cstart;
3287     cmap  = a->garray;
3288     nzA   = a->A->n;
3289     nzB   = a->B->n;
3290     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3291     ncols = 0;
3292     for (i=0; i<nzB; i++) {
3293       if (cmap[i] < start) idx[ncols++] = cmap[i];
3294       else break;
3295     }
3296     imark = i;
3297     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3298     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3299     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3300     ierr = PetscFree(idx);CHKERRQ(ierr);
3301   } else {
3302     iscola = *col;
3303   }
3304   if (scall != MAT_INITIAL_MATRIX){
3305     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3306     aloc[0] = *A_loc;
3307   }
3308   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3309   *A_loc = aloc[0];
3310   ierr = PetscFree(aloc);CHKERRQ(ierr);
3311   if (!row){
3312     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3313   }
3314   if (!col){
3315     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3316   }
3317   PetscFunctionReturn(0);
3318 }
3319 
3320 #undef __FUNCT__
3321 #define __FUNCT__ "MatGetBrowsOfAcols"
3322 /*@C
3323      MatGetLocalMat - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero col of A
3324 
3325     Collective on Mat
3326 
3327    Input Parameters:
3328 +    A,B - the matrices
3329 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3330 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3331 
3332    Output Parameter:
3333 +    rowb, colb - index sets of rows and columns of B to extract
3334 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3335 -    B_seq - the sequential matrix generated
3336 
3337     Level: developer
3338 
3339 @*/
3340 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3341 {
3342   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3343   PetscErrorCode    ierr;
3344   int               *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3345   IS                isrowb,iscolb;
3346   Mat               *bseq;
3347 
3348   PetscFunctionBegin;
3349   if (a->cstart != b->rstart || a->cend != b->rend){
3350     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3351   }
3352 
3353   if (scall == MAT_INITIAL_MATRIX){
3354     start = a->cstart;
3355     cmap  = a->garray;
3356     nzA   = a->A->n;
3357     nzB   = a->B->n;
3358     ierr  = PetscMalloc((nzA+nzB)*sizeof(int), &idx);CHKERRQ(ierr);
3359     ncols = 0;
3360     for (i=0; i<nzB; i++) {
3361       if (cmap[i] < start) idx[ncols++] = cmap[i];
3362       else break;
3363     }
3364     imark = i;
3365     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3366     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3367     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3368     ierr = PetscFree(idx);CHKERRQ(ierr);
3369     *brstart = imark;
3370     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3371   } else {
3372     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3373     isrowb = *rowb; iscolb = *colb;
3374     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3375     bseq[0] = *B_seq;
3376   }
3377   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3378   *B_seq = bseq[0];
3379   ierr = PetscFree(bseq);CHKERRQ(ierr);
3380   if (!rowb){
3381     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3382   } else {
3383     *rowb = isrowb;
3384   }
3385   if (!colb){
3386     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3387   } else {
3388     *colb = iscolb;
3389   }
3390 
3391   PetscFunctionReturn(0);
3392 }
3393