xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 7a7894de0ae7ea508ad2b43522ac5a3ba7d8845d)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 EXTERN_C_BEGIN
634 #undef __FUNCT__
635 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
637 {
638   MPI_Comm       comm;
639   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
640   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
641   IS             Me,Notme;
642   PetscErrorCode ierr;
643   PetscInt       M,N,first,last,*notme,i;
644   PetscMPIInt    size;
645 
646   PetscFunctionBegin;
647 
648   /* Easy test: symmetric diagonal block */
649   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
650   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
651   if (!*f) PetscFunctionReturn(0);
652   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
654   if (size == 1) PetscFunctionReturn(0);
655 
656   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
657   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
663   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
664   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
665   Aoff = Aoffs[0];
666   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
667   Boff = Boffs[0];
668   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
670   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
671   ierr = ISDestroy(Me);CHKERRQ(ierr);
672   ierr = ISDestroy(Notme);CHKERRQ(ierr);
673 
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
681 {
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683   PetscErrorCode ierr;
684 
685   PetscFunctionBegin;
686   /* do nondiagonal part */
687   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   PetscErrorCode ierr;
708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
709 
710   PetscFunctionBegin;
711   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
712   if (a->rstart != a->cstart || a->rend != a->cend) {
713     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
714   }
715   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatScale_MPIAIJ"
721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
722 {
723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
728   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPIAIJ"
734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
735 {
736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
747 #if defined (PETSC_USE_CTABLE)
748   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
749 #else
750   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
751 #endif
752   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
753   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
754   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
755   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
756   ierr = PetscFree(aij);CHKERRQ(ierr);
757 
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPIAIJ_Binary"
770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
773   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
774   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
775   PetscErrorCode    ierr;
776   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
777   int               fd;
778   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
779   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
780   PetscScalar       *column_values;
781 
782   PetscFunctionBegin;
783   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
784   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
785   nz   = A->nz + B->nz;
786   if (!rank) {
787     header[0] = MAT_FILE_COOKIE;
788     header[1] = mat->M;
789     header[2] = mat->N;
790     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
791     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
792     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
793     /* get largest number of rows any processor has */
794     rlen = mat->m;
795     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
796     for (i=1; i<size; i++) {
797       rlen = PetscMax(rlen,range[i+1] - range[i]);
798     }
799   } else {
800     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
801     rlen = mat->m;
802   }
803 
804   /* load up the local row counts */
805   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
806   for (i=0; i<mat->m; i++) {
807     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
808   }
809 
810   /* store the row lengths to the file */
811   if (!rank) {
812     MPI_Status status;
813     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     for (i=1; i<size; i++) {
815       rlen = range[i+1] - range[i];
816       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
817       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     }
819   } else {
820     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
823 
824   /* load up the local column indices */
825   nzmax = nz; /* )th processor needs space a largest processor needs */
826   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
827   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
828   cnt  = 0;
829   for (i=0; i<mat->m; i++) {
830     for (j=B->i[i]; j<B->i[i+1]; j++) {
831       if ( (col = garray[B->j[j]]) > cstart) break;
832       column_indices[cnt++] = col;
833     }
834     for (k=A->i[i]; k<A->i[i+1]; k++) {
835       column_indices[cnt++] = A->j[k] + cstart;
836     }
837     for (; j<B->i[i+1]; j++) {
838       column_indices[cnt++] = garray[B->j[j]];
839     }
840   }
841   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
842 
843   /* store the column indices to the file */
844   if (!rank) {
845     MPI_Status status;
846     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     for (i=1; i<size; i++) {
848       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
850       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
852     }
853   } else {
854     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
855     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(column_indices);CHKERRQ(ierr);
858 
859   /* load up the local column values */
860   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
861   cnt  = 0;
862   for (i=0; i<mat->m; i++) {
863     for (j=B->i[i]; j<B->i[i+1]; j++) {
864       if ( garray[B->j[j]] > cstart) break;
865       column_values[cnt++] = B->a[j];
866     }
867     for (k=A->i[i]; k<A->i[i+1]; k++) {
868       column_values[cnt++] = A->a[k];
869     }
870     for (; j<B->i[i+1]; j++) {
871       column_values[cnt++] = B->a[j];
872     }
873   }
874   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
875 
876   /* store the column values to the file */
877   if (!rank) {
878     MPI_Status status;
879     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     for (i=1; i<size; i++) {
881       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
883       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
885     }
886   } else {
887     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
888     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(column_values);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
897 {
898   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
899   PetscErrorCode    ierr;
900   PetscMPIInt       rank = aij->rank,size = aij->size;
901   PetscTruth        isdraw,iascii,isbinary;
902   PetscViewer       sviewer;
903   PetscViewerFormat format;
904 
905   PetscFunctionBegin;
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
909   if (iascii) {
910     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
911     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
912       MatInfo    info;
913       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
914       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
915       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
916       if (!a->inode.size) {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
918 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
919       } else {
920         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
921 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
922       }
923       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
927       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
928       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
932       if (a->inode.size) {
933         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
934       } else {
935         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
936       }
937       PetscFunctionReturn(0);
938     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
939       PetscFunctionReturn(0);
940     }
941   } else if (isbinary) {
942     if (size == 1) {
943       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
944       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
945     } else {
946       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
947     }
948     PetscFunctionReturn(0);
949   } else if (isdraw) {
950     PetscDraw  draw;
951     PetscTruth isnull;
952     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
953     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
954   }
955 
956   if (size == 1) {
957     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
958     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
959   } else {
960     /* assemble the entire matrix onto first processor. */
961     Mat         A;
962     Mat_SeqAIJ  *Aloc;
963     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
964     PetscScalar *a;
965 
966     if (!rank) {
967       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
968     } else {
969       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
970     }
971     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
972     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
973     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
974     PetscLogObjectParent(mat,A);
975 
976     /* copy over the A part */
977     Aloc = (Mat_SeqAIJ*)aij->A->data;
978     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
979     row = aij->rstart;
980     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
984     }
985     aj = Aloc->j;
986     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
987 
988     /* copy over the B part */
989     Aloc = (Mat_SeqAIJ*)aij->B->data;
990     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
991     row  = aij->rstart;
992     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
993     ct   = cols;
994     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
995     for (i=0; i<m; i++) {
996       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
997       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
998     }
999     ierr = PetscFree(ct);CHKERRQ(ierr);
1000     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002     /*
1003        Everyone has to call to draw the matrix since the graphics waits are
1004        synchronized across all processors that share the PetscDraw object
1005     */
1006     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1007     if (!rank) {
1008       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1009       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1010     }
1011     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1012     ierr = MatDestroy(A);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatView_MPIAIJ"
1019 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1020 {
1021   PetscErrorCode ierr;
1022   PetscTruth     iascii,isdraw,issocket,isbinary;
1023 
1024   PetscFunctionBegin;
1025   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1026   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1027   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1028   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1029   if (iascii || isdraw || isbinary || issocket) {
1030     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1031   } else {
1032     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatRelax_MPIAIJ"
1041 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1042 {
1043   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1044   PetscErrorCode ierr;
1045   Vec            bb1;
1046   PetscScalar    mone=-1.0;
1047 
1048   PetscFunctionBegin;
1049   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1050 
1051   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1052 
1053   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1054     if (flag & SOR_ZERO_INITIAL_GUESS) {
1055       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1056       its--;
1057     }
1058 
1059     while (its--) {
1060       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062 
1063       /* update rhs: bb1 = bb - B*x */
1064       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1065       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1066 
1067       /* local sweep */
1068       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1069       CHKERRQ(ierr);
1070     }
1071   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1072     if (flag & SOR_ZERO_INITIAL_GUESS) {
1073       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1074       its--;
1075     }
1076     while (its--) {
1077       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1078       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1079 
1080       /* update rhs: bb1 = bb - B*x */
1081       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1082       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1083 
1084       /* local sweep */
1085       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1086       CHKERRQ(ierr);
1087     }
1088   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1089     if (flag & SOR_ZERO_INITIAL_GUESS) {
1090       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1091       its--;
1092     }
1093     while (its--) {
1094       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1095       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1096 
1097       /* update rhs: bb1 = bb - B*x */
1098       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1099       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1100 
1101       /* local sweep */
1102       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1103       CHKERRQ(ierr);
1104     }
1105   } else {
1106     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1107   }
1108 
1109   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1115 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1116 {
1117   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1118   Mat            A = mat->A,B = mat->B;
1119   PetscErrorCode ierr;
1120   PetscReal      isend[5],irecv[5];
1121 
1122   PetscFunctionBegin;
1123   info->block_size     = 1.0;
1124   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1126   isend[3] = info->memory;  isend[4] = info->mallocs;
1127   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1129   isend[3] += info->memory;  isend[4] += info->mallocs;
1130   if (flag == MAT_LOCAL) {
1131     info->nz_used      = isend[0];
1132     info->nz_allocated = isend[1];
1133     info->nz_unneeded  = isend[2];
1134     info->memory       = isend[3];
1135     info->mallocs      = isend[4];
1136   } else if (flag == MAT_GLOBAL_MAX) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   } else if (flag == MAT_GLOBAL_SUM) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   }
1151   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1152   info->fill_ratio_needed = 0;
1153   info->factor_mallocs    = 0;
1154   info->rows_global       = (double)matin->M;
1155   info->columns_global    = (double)matin->N;
1156   info->rows_local        = (double)matin->m;
1157   info->columns_local     = (double)matin->N;
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSetOption_MPIAIJ"
1164 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1165 {
1166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   switch (op) {
1171   case MAT_NO_NEW_NONZERO_LOCATIONS:
1172   case MAT_YES_NEW_NONZERO_LOCATIONS:
1173   case MAT_COLUMNS_UNSORTED:
1174   case MAT_COLUMNS_SORTED:
1175   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1176   case MAT_KEEP_ZEROED_ROWS:
1177   case MAT_NEW_NONZERO_LOCATION_ERR:
1178   case MAT_USE_INODES:
1179   case MAT_DO_NOT_USE_INODES:
1180   case MAT_IGNORE_ZERO_ENTRIES:
1181     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1182     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1183     break;
1184   case MAT_ROW_ORIENTED:
1185     a->roworiented = PETSC_TRUE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_ROWS_SORTED:
1190   case MAT_ROWS_UNSORTED:
1191   case MAT_YES_NEW_DIAGONALS:
1192     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1193     break;
1194   case MAT_COLUMN_ORIENTED:
1195     a->roworiented = PETSC_FALSE;
1196     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1197     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_OFF_PROC_ENTRIES:
1200     a->donotstash = PETSC_TRUE;
1201     break;
1202   case MAT_NO_NEW_DIAGONALS:
1203     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1204   case MAT_SYMMETRIC:
1205   case MAT_STRUCTURALLY_SYMMETRIC:
1206   case MAT_HERMITIAN:
1207   case MAT_SYMMETRY_ETERNAL:
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     break;
1210   case MAT_NOT_SYMMETRIC:
1211   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1212   case MAT_NOT_HERMITIAN:
1213   case MAT_NOT_SYMMETRY_ETERNAL:
1214     break;
1215   default:
1216     SETERRQ(PETSC_ERR_SUP,"unknown option");
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetRow_MPIAIJ"
1223 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1224 {
1225   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1226   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1227   PetscErrorCode ierr;
1228   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1229   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1230   PetscInt       *cmap,*idx_p;
1231 
1232   PetscFunctionBegin;
1233   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1234   mat->getrowactive = PETSC_TRUE;
1235 
1236   if (!mat->rowvalues && (idx || v)) {
1237     /*
1238         allocate enough space to hold information from the longest row.
1239     */
1240     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1241     PetscInt     max = 1,tmp;
1242     for (i=0; i<matin->m; i++) {
1243       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1244       if (max < tmp) { max = tmp; }
1245     }
1246     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1247     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1248   }
1249 
1250   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1251   lrow = row - rstart;
1252 
1253   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1254   if (!v)   {pvA = 0; pvB = 0;}
1255   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1256   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1257   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1258   nztot = nzA + nzB;
1259 
1260   cmap  = mat->garray;
1261   if (v  || idx) {
1262     if (nztot) {
1263       /* Sort by increasing column numbers, assuming A and B already sorted */
1264       PetscInt imark = -1;
1265       if (v) {
1266         *v = v_p = mat->rowvalues;
1267         for (i=0; i<nzB; i++) {
1268           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1269           else break;
1270         }
1271         imark = i;
1272         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1273         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1274       }
1275       if (idx) {
1276         *idx = idx_p = mat->rowindices;
1277         if (imark > -1) {
1278           for (i=0; i<imark; i++) {
1279             idx_p[i] = cmap[cworkB[i]];
1280           }
1281         } else {
1282           for (i=0; i<nzB; i++) {
1283             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1284             else break;
1285           }
1286           imark = i;
1287         }
1288         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1289         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1290       }
1291     } else {
1292       if (idx) *idx = 0;
1293       if (v)   *v   = 0;
1294     }
1295   }
1296   *nz = nztot;
1297   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1298   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1304 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1305 {
1306   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1307 
1308   PetscFunctionBegin;
1309   if (!aij->getrowactive) {
1310     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1311   }
1312   aij->getrowactive = PETSC_FALSE;
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatNorm_MPIAIJ"
1318 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1319 {
1320   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1321   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1322   PetscErrorCode ierr;
1323   PetscInt       i,j,cstart = aij->cstart;
1324   PetscReal      sum = 0.0;
1325   PetscScalar    *v;
1326 
1327   PetscFunctionBegin;
1328   if (aij->size == 1) {
1329     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1330   } else {
1331     if (type == NORM_FROBENIUS) {
1332       v = amat->a;
1333       for (i=0; i<amat->nz; i++) {
1334 #if defined(PETSC_USE_COMPLEX)
1335         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1336 #else
1337         sum += (*v)*(*v); v++;
1338 #endif
1339       }
1340       v = bmat->a;
1341       for (i=0; i<bmat->nz; i++) {
1342 #if defined(PETSC_USE_COMPLEX)
1343         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1344 #else
1345         sum += (*v)*(*v); v++;
1346 #endif
1347       }
1348       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1349       *norm = sqrt(*norm);
1350     } else if (type == NORM_1) { /* max column norm */
1351       PetscReal *tmp,*tmp2;
1352       PetscInt    *jj,*garray = aij->garray;
1353       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1354       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1355       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1356       *norm = 0.0;
1357       v = amat->a; jj = amat->j;
1358       for (j=0; j<amat->nz; j++) {
1359         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1360       }
1361       v = bmat->a; jj = bmat->j;
1362       for (j=0; j<bmat->nz; j++) {
1363         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1364       }
1365       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1366       for (j=0; j<mat->N; j++) {
1367         if (tmp2[j] > *norm) *norm = tmp2[j];
1368       }
1369       ierr = PetscFree(tmp);CHKERRQ(ierr);
1370       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1371     } else if (type == NORM_INFINITY) { /* max row norm */
1372       PetscReal ntemp = 0.0;
1373       for (j=0; j<aij->A->m; j++) {
1374         v = amat->a + amat->i[j];
1375         sum = 0.0;
1376         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1377           sum += PetscAbsScalar(*v); v++;
1378         }
1379         v = bmat->a + bmat->i[j];
1380         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1381           sum += PetscAbsScalar(*v); v++;
1382         }
1383         if (sum > ntemp) ntemp = sum;
1384       }
1385       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1386     } else {
1387       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1388     }
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatTranspose_MPIAIJ"
1395 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1396 {
1397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1398   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1399   PetscErrorCode ierr;
1400   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1401   Mat            B;
1402   PetscScalar    *array;
1403 
1404   PetscFunctionBegin;
1405   if (!matout && M != N) {
1406     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1407   }
1408 
1409   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1410   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1411   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1412 
1413   /* copy over the A part */
1414   Aloc = (Mat_SeqAIJ*)a->A->data;
1415   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1416   row = a->rstart;
1417   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1418   for (i=0; i<m; i++) {
1419     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1420     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1421   }
1422   aj = Aloc->j;
1423   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1424 
1425   /* copy over the B part */
1426   Aloc = (Mat_SeqAIJ*)a->B->data;
1427   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1428   row  = a->rstart;
1429   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1430   ct   = cols;
1431   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1432   for (i=0; i<m; i++) {
1433     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1434     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1435   }
1436   ierr = PetscFree(ct);CHKERRQ(ierr);
1437   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1438   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1439   if (matout) {
1440     *matout = B;
1441   } else {
1442     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1449 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1450 {
1451   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1452   Mat            a = aij->A,b = aij->B;
1453   PetscErrorCode ierr;
1454   PetscInt       s1,s2,s3;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1458   if (rr) {
1459     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1460     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1461     /* Overlap communication with computation. */
1462     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1463   }
1464   if (ll) {
1465     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1466     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1467     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1468   }
1469   /* scale  the diagonal block */
1470   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1471 
1472   if (rr) {
1473     /* Do a scatter end and then right scale the off-diagonal block */
1474     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1475     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1476   }
1477 
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1484 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1485 {
1486   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   if (!a->rank) {
1491     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1498 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1499 {
1500   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1505   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1510 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1511 {
1512   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatEqual_MPIAIJ"
1522 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1523 {
1524   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1525   Mat            a,b,c,d;
1526   PetscTruth     flg;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   a = matA->A; b = matA->B;
1531   c = matB->A; d = matB->B;
1532 
1533   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1534   if (flg) {
1535     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1536   }
1537   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatCopy_MPIAIJ"
1543 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1544 {
1545   PetscErrorCode ierr;
1546   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1547   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1548 
1549   PetscFunctionBegin;
1550   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1551   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1552     /* because of the column compression in the off-processor part of the matrix a->B,
1553        the number of columns in a->B and b->B may be different, hence we cannot call
1554        the MatCopy() directly on the two parts. If need be, we can provide a more
1555        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1556        then copying the submatrices */
1557     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1558   } else {
1559     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1560     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1567 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1568 {
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #include "petscblaslapack.h"
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatAXPY_MPIAIJ"
1579 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1580 {
1581   PetscErrorCode ierr;
1582   PetscInt       i;
1583   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1584   PetscBLASInt   bnz,one=1;
1585   Mat_SeqAIJ     *x,*y;
1586 
1587   PetscFunctionBegin;
1588   if (str == SAME_NONZERO_PATTERN) {
1589     x = (Mat_SeqAIJ *)xx->A->data;
1590     y = (Mat_SeqAIJ *)yy->A->data;
1591     bnz = (PetscBLASInt)x->nz;
1592     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1593     x = (Mat_SeqAIJ *)xx->B->data;
1594     y = (Mat_SeqAIJ *)yy->B->data;
1595     bnz = (PetscBLASInt)x->nz;
1596     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1597   } else if (str == SUBSET_NONZERO_PATTERN) {
1598     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1599 
1600     x = (Mat_SeqAIJ *)xx->B->data;
1601     y = (Mat_SeqAIJ *)yy->B->data;
1602     if (y->xtoy && y->XtoY != xx->B) {
1603       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1604       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1605     }
1606     if (!y->xtoy) { /* get xtoy */
1607       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1608       y->XtoY = xx->B;
1609     }
1610     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1611   } else {
1612     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1613   }
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 /* -------------------------------------------------------------------*/
1618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1619        MatGetRow_MPIAIJ,
1620        MatRestoreRow_MPIAIJ,
1621        MatMult_MPIAIJ,
1622 /* 4*/ MatMultAdd_MPIAIJ,
1623        MatMultTranspose_MPIAIJ,
1624        MatMultTransposeAdd_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628 /*10*/ 0,
1629        0,
1630        0,
1631        MatRelax_MPIAIJ,
1632        MatTranspose_MPIAIJ,
1633 /*15*/ MatGetInfo_MPIAIJ,
1634        MatEqual_MPIAIJ,
1635        MatGetDiagonal_MPIAIJ,
1636        MatDiagonalScale_MPIAIJ,
1637        MatNorm_MPIAIJ,
1638 /*20*/ MatAssemblyBegin_MPIAIJ,
1639        MatAssemblyEnd_MPIAIJ,
1640        0,
1641        MatSetOption_MPIAIJ,
1642        MatZeroEntries_MPIAIJ,
1643 /*25*/ MatZeroRows_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*30*/ MatSetUpPreallocation_MPIAIJ,
1649        0,
1650        0,
1651        0,
1652        0,
1653 /*35*/ MatDuplicate_MPIAIJ,
1654        0,
1655        0,
1656        0,
1657        0,
1658 /*40*/ MatAXPY_MPIAIJ,
1659        MatGetSubMatrices_MPIAIJ,
1660        MatIncreaseOverlap_MPIAIJ,
1661        MatGetValues_MPIAIJ,
1662        MatCopy_MPIAIJ,
1663 /*45*/ MatPrintHelp_MPIAIJ,
1664        MatScale_MPIAIJ,
1665        0,
1666        0,
1667        0,
1668 /*50*/ MatSetBlockSize_MPIAIJ,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*55*/ MatFDColoringCreate_MPIAIJ,
1674        0,
1675        MatSetUnfactored_MPIAIJ,
1676        0,
1677        0,
1678 /*60*/ MatGetSubMatrix_MPIAIJ,
1679        MatDestroy_MPIAIJ,
1680        MatView_MPIAIJ,
1681        MatGetPetscMaps_Petsc,
1682        0,
1683 /*65*/ 0,
1684        0,
1685        0,
1686        0,
1687        0,
1688 /*70*/ 0,
1689        0,
1690        MatSetColoring_MPIAIJ,
1691 #if defined(PETSC_HAVE_ADIC)
1692        MatSetValuesAdic_MPIAIJ,
1693 #else
1694        0,
1695 #endif
1696        MatSetValuesAdifor_MPIAIJ,
1697 /*75*/ 0,
1698        0,
1699        0,
1700        0,
1701        0,
1702 /*80*/ 0,
1703        0,
1704        0,
1705        0,
1706 /*84*/ MatLoad_MPIAIJ,
1707        0,
1708        0,
1709        0,
1710        0,
1711        0,
1712 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1713        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1714        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1715        MatPtAP_Basic,
1716        MatPtAPSymbolic_MPIAIJ,
1717 /*95*/ MatPtAPNumeric_MPIAIJ,
1718        0,
1719        0,
1720        0,
1721        0,
1722 /*100*/0,
1723        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1724        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1725 };
1726 
1727 /* ----------------------------------------------------------------------------------------*/
1728 
1729 EXTERN_C_BEGIN
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1732 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1733 {
1734   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1739   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 EXTERN_C_END
1743 
1744 EXTERN_C_BEGIN
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1747 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1748 {
1749   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1750   PetscErrorCode ierr;
1751 
1752   PetscFunctionBegin;
1753   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1754   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1755   PetscFunctionReturn(0);
1756 }
1757 EXTERN_C_END
1758 
1759 #include "petscpc.h"
1760 EXTERN_C_BEGIN
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1764 {
1765   Mat_MPIAIJ     *b;
1766   PetscErrorCode ierr;
1767   PetscInt       i;
1768 
1769   PetscFunctionBegin;
1770   B->preallocated = PETSC_TRUE;
1771   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1772   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1773   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1774   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1775   if (d_nnz) {
1776     for (i=0; i<B->m; i++) {
1777       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1778     }
1779   }
1780   if (o_nnz) {
1781     for (i=0; i<B->m; i++) {
1782       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1783     }
1784   }
1785   b = (Mat_MPIAIJ*)B->data;
1786   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1787   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1788 
1789   PetscFunctionReturn(0);
1790 }
1791 EXTERN_C_END
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1795 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1796 {
1797   Mat            mat;
1798   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1799   PetscErrorCode ierr;
1800 
1801   PetscFunctionBegin;
1802   *newmat       = 0;
1803   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1804   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1805   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1806   a    = (Mat_MPIAIJ*)mat->data;
1807 
1808   mat->factor       = matin->factor;
1809   mat->bs           = matin->bs;
1810   mat->assembled    = PETSC_TRUE;
1811   mat->insertmode   = NOT_SET_VALUES;
1812   mat->preallocated = PETSC_TRUE;
1813 
1814   a->rstart       = oldmat->rstart;
1815   a->rend         = oldmat->rend;
1816   a->cstart       = oldmat->cstart;
1817   a->cend         = oldmat->cend;
1818   a->size         = oldmat->size;
1819   a->rank         = oldmat->rank;
1820   a->donotstash   = oldmat->donotstash;
1821   a->roworiented  = oldmat->roworiented;
1822   a->rowindices   = 0;
1823   a->rowvalues    = 0;
1824   a->getrowactive = PETSC_FALSE;
1825 
1826   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1827   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1828   if (oldmat->colmap) {
1829 #if defined (PETSC_USE_CTABLE)
1830     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1831 #else
1832     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1833     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1834     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1835 #endif
1836   } else a->colmap = 0;
1837   if (oldmat->garray) {
1838     PetscInt len;
1839     len  = oldmat->B->n;
1840     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1841     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1842     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1843   } else a->garray = 0;
1844 
1845   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1846   PetscLogObjectParent(mat,a->lvec);
1847   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1848   PetscLogObjectParent(mat,a->Mvctx);
1849   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1850   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1851   PetscLogObjectParent(mat,a->A);
1852   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1853   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1854   PetscLogObjectParent(mat,a->B);
1855   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1856   *newmat = mat;
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 #include "petscsys.h"
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatLoad_MPIAIJ"
1864 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1865 {
1866   Mat            A;
1867   PetscScalar    *vals,*svals;
1868   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1869   MPI_Status     status;
1870   PetscErrorCode ierr;
1871   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1872   PetscInt       i,nz,j,rstart,rend;
1873   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1874   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1875   PetscInt       cend,cstart,n,*rowners;
1876   int            fd;
1877 
1878   PetscFunctionBegin;
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1881   if (!rank) {
1882     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1883     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1884     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1885   }
1886 
1887   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1888   M = header[1]; N = header[2];
1889   /* determine ownership of all rows */
1890   m    = M/size + ((M % size) > rank);
1891   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1892   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1893   rowners[0] = 0;
1894   for (i=2; i<=size; i++) {
1895     rowners[i] += rowners[i-1];
1896   }
1897   rstart = rowners[rank];
1898   rend   = rowners[rank+1];
1899 
1900   /* distribute row lengths to all processors */
1901   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1902   if (!rank) {
1903     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1904     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1905     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1906     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1907     for (j=0; j<m; j++) {
1908       procsnz[0] += ourlens[j];
1909     }
1910     for (i=1; i<size; i++) {
1911       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1912       /* calculate the number of nonzeros on each processor */
1913       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1914         procsnz[i] += rowlengths[j];
1915       }
1916       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1917     }
1918     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1919   } else {
1920     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1921   }
1922 
1923   if (!rank) {
1924     /* determine max buffer needed and allocate it */
1925     maxnz = 0;
1926     for (i=0; i<size; i++) {
1927       maxnz = PetscMax(maxnz,procsnz[i]);
1928     }
1929     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1930 
1931     /* read in my part of the matrix column indices  */
1932     nz   = procsnz[0];
1933     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1934     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1935 
1936     /* read in every one elses and ship off */
1937     for (i=1; i<size; i++) {
1938       nz   = procsnz[i];
1939       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1940       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1941     }
1942     ierr = PetscFree(cols);CHKERRQ(ierr);
1943   } else {
1944     /* determine buffer space needed for message */
1945     nz = 0;
1946     for (i=0; i<m; i++) {
1947       nz += ourlens[i];
1948     }
1949     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1950 
1951     /* receive message of column indices*/
1952     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1953     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1954     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1955   }
1956 
1957   /* determine column ownership if matrix is not square */
1958   if (N != M) {
1959     n      = N/size + ((N % size) > rank);
1960     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1961     cstart = cend - n;
1962   } else {
1963     cstart = rstart;
1964     cend   = rend;
1965     n      = cend - cstart;
1966   }
1967 
1968   /* loop over local rows, determining number of off diagonal entries */
1969   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1970   jj = 0;
1971   for (i=0; i<m; i++) {
1972     for (j=0; j<ourlens[i]; j++) {
1973       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1974       jj++;
1975     }
1976   }
1977 
1978   /* create our matrix */
1979   for (i=0; i<m; i++) {
1980     ourlens[i] -= offlens[i];
1981   }
1982   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1983   ierr = MatSetType(A,type);CHKERRQ(ierr);
1984   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1985 
1986   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1987   for (i=0; i<m; i++) {
1988     ourlens[i] += offlens[i];
1989   }
1990 
1991   if (!rank) {
1992     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1993 
1994     /* read in my part of the matrix numerical values  */
1995     nz   = procsnz[0];
1996     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1997 
1998     /* insert into matrix */
1999     jj      = rstart;
2000     smycols = mycols;
2001     svals   = vals;
2002     for (i=0; i<m; i++) {
2003       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2004       smycols += ourlens[i];
2005       svals   += ourlens[i];
2006       jj++;
2007     }
2008 
2009     /* read in other processors and ship out */
2010     for (i=1; i<size; i++) {
2011       nz   = procsnz[i];
2012       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2013       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2014     }
2015     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2016   } else {
2017     /* receive numeric values */
2018     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2019 
2020     /* receive message of values*/
2021     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2022     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2023     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2024 
2025     /* insert into matrix */
2026     jj      = rstart;
2027     smycols = mycols;
2028     svals   = vals;
2029     for (i=0; i<m; i++) {
2030       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2031       smycols += ourlens[i];
2032       svals   += ourlens[i];
2033       jj++;
2034     }
2035   }
2036   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2037   ierr = PetscFree(vals);CHKERRQ(ierr);
2038   ierr = PetscFree(mycols);CHKERRQ(ierr);
2039   ierr = PetscFree(rowners);CHKERRQ(ierr);
2040 
2041   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2043   *newmat = A;
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2049 /*
2050     Not great since it makes two copies of the submatrix, first an SeqAIJ
2051   in local and then by concatenating the local matrices the end result.
2052   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2053 */
2054 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2055 {
2056   PetscErrorCode ierr;
2057   PetscMPIInt    rank,size;
2058   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2059   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2060   Mat            *local,M,Mreuse;
2061   PetscScalar    *vwork,*aa;
2062   MPI_Comm       comm = mat->comm;
2063   Mat_SeqAIJ     *aij;
2064 
2065 
2066   PetscFunctionBegin;
2067   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2069 
2070   if (call ==  MAT_REUSE_MATRIX) {
2071     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2072     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2073     local = &Mreuse;
2074     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2075   } else {
2076     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2077     Mreuse = *local;
2078     ierr   = PetscFree(local);CHKERRQ(ierr);
2079   }
2080 
2081   /*
2082       m - number of local rows
2083       n - number of columns (same on all processors)
2084       rstart - first row in new global matrix generated
2085   */
2086   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2087   if (call == MAT_INITIAL_MATRIX) {
2088     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2089     ii  = aij->i;
2090     jj  = aij->j;
2091 
2092     /*
2093         Determine the number of non-zeros in the diagonal and off-diagonal
2094         portions of the matrix in order to do correct preallocation
2095     */
2096 
2097     /* first get start and end of "diagonal" columns */
2098     if (csize == PETSC_DECIDE) {
2099       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2100       if (mglobal == n) { /* square matrix */
2101 	nlocal = m;
2102       } else {
2103         nlocal = n/size + ((n % size) > rank);
2104       }
2105     } else {
2106       nlocal = csize;
2107     }
2108     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2109     rstart = rend - nlocal;
2110     if (rank == size - 1 && rend != n) {
2111       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2112     }
2113 
2114     /* next, compute all the lengths */
2115     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2116     olens = dlens + m;
2117     for (i=0; i<m; i++) {
2118       jend = ii[i+1] - ii[i];
2119       olen = 0;
2120       dlen = 0;
2121       for (j=0; j<jend; j++) {
2122         if (*jj < rstart || *jj >= rend) olen++;
2123         else dlen++;
2124         jj++;
2125       }
2126       olens[i] = olen;
2127       dlens[i] = dlen;
2128     }
2129     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2130     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2131     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2132     ierr = PetscFree(dlens);CHKERRQ(ierr);
2133   } else {
2134     PetscInt ml,nl;
2135 
2136     M = *newmat;
2137     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2138     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2139     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2140     /*
2141          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2142        rather than the slower MatSetValues().
2143     */
2144     M->was_assembled = PETSC_TRUE;
2145     M->assembled     = PETSC_FALSE;
2146   }
2147   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2148   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2149   ii  = aij->i;
2150   jj  = aij->j;
2151   aa  = aij->a;
2152   for (i=0; i<m; i++) {
2153     row   = rstart + i;
2154     nz    = ii[i+1] - ii[i];
2155     cwork = jj;     jj += nz;
2156     vwork = aa;     aa += nz;
2157     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2158   }
2159 
2160   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   *newmat = M;
2163 
2164   /* save submatrix used in processor for next request */
2165   if (call ==  MAT_INITIAL_MATRIX) {
2166     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2167     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2168   }
2169 
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 EXTERN_C_BEGIN
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2176 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2177 {
2178   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2179   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2180   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2181   const PetscInt *JJ;
2182   PetscScalar    *values;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186 #if defined(PETSC_OPT_g)
2187   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2188 #endif
2189   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2190   o_nnz = d_nnz + m;
2191 
2192   for (i=0; i<m; i++) {
2193     nnz     = I[i+1]- I[i];
2194     JJ      = J + I[i];
2195     nnz_max = PetscMax(nnz_max,nnz);
2196 #if defined(PETSC_OPT_g)
2197     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2198 #endif
2199     for (j=0; j<nnz; j++) {
2200       if (*JJ >= cstart) break;
2201       JJ++;
2202     }
2203     d = 0;
2204     for (; j<nnz; j++) {
2205       if (*JJ++ >= cend) break;
2206       d++;
2207     }
2208     d_nnz[i] = d;
2209     o_nnz[i] = nnz - d;
2210   }
2211   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2212   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2213 
2214   if (v) values = (PetscScalar*)v;
2215   else {
2216     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2217     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2218   }
2219 
2220   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2221   for (i=0; i<m; i++) {
2222     ii   = i + rstart;
2223     nnz  = I[i+1]- I[i];
2224     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2225   }
2226   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2227   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2228   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2229 
2230   if (!v) {
2231     ierr = PetscFree(values);CHKERRQ(ierr);
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 EXTERN_C_END
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2239 /*@C
2240    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2241    (the default parallel PETSc format).
2242 
2243    Collective on MPI_Comm
2244 
2245    Input Parameters:
2246 +  A - the matrix
2247 .  i - the indices into j for the start of each local row (starts with zero)
2248 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2249 -  v - optional values in the matrix
2250 
2251    Level: developer
2252 
2253 .keywords: matrix, aij, compressed row, sparse, parallel
2254 
2255 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2256 @*/
2257 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2258 {
2259   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2260 
2261   PetscFunctionBegin;
2262   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2263   if (f) {
2264     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2271 /*@C
2272    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2273    (the default parallel PETSc format).  For good matrix assembly performance
2274    the user should preallocate the matrix storage by setting the parameters
2275    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2276    performance can be increased by more than a factor of 50.
2277 
2278    Collective on MPI_Comm
2279 
2280    Input Parameters:
2281 +  A - the matrix
2282 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2283            (same value is used for all local rows)
2284 .  d_nnz - array containing the number of nonzeros in the various rows of the
2285            DIAGONAL portion of the local submatrix (possibly different for each row)
2286            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2287            The size of this array is equal to the number of local rows, i.e 'm'.
2288            You must leave room for the diagonal entry even if it is zero.
2289 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2290            submatrix (same value is used for all local rows).
2291 -  o_nnz - array containing the number of nonzeros in the various rows of the
2292            OFF-DIAGONAL portion of the local submatrix (possibly different for
2293            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2294            structure. The size of this array is equal to the number
2295            of local rows, i.e 'm'.
2296 
2297    If the *_nnz parameter is given then the *_nz parameter is ignored
2298 
2299    The AIJ format (also called the Yale sparse matrix format or
2300    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2301    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2302 
2303    The parallel matrix is partitioned such that the first m0 rows belong to
2304    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2305    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2306 
2307    The DIAGONAL portion of the local submatrix of a processor can be defined
2308    as the submatrix which is obtained by extraction the part corresponding
2309    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2310    first row that belongs to the processor, and r2 is the last row belonging
2311    to the this processor. This is a square mxm matrix. The remaining portion
2312    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2313 
2314    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2315 
2316    Example usage:
2317 
2318    Consider the following 8x8 matrix with 34 non-zero values, that is
2319    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2320    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2321    as follows:
2322 
2323 .vb
2324             1  2  0  |  0  3  0  |  0  4
2325     Proc0   0  5  6  |  7  0  0  |  8  0
2326             9  0 10  | 11  0  0  | 12  0
2327     -------------------------------------
2328            13  0 14  | 15 16 17  |  0  0
2329     Proc1   0 18  0  | 19 20 21  |  0  0
2330             0  0  0  | 22 23  0  | 24  0
2331     -------------------------------------
2332     Proc2  25 26 27  |  0  0 28  | 29  0
2333            30  0  0  | 31 32 33  |  0 34
2334 .ve
2335 
2336    This can be represented as a collection of submatrices as:
2337 
2338 .vb
2339       A B C
2340       D E F
2341       G H I
2342 .ve
2343 
2344    Where the submatrices A,B,C are owned by proc0, D,E,F are
2345    owned by proc1, G,H,I are owned by proc2.
2346 
2347    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2348    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2349    The 'M','N' parameters are 8,8, and have the same values on all procs.
2350 
2351    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2352    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2353    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2354    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2355    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2356    matrix, ans [DF] as another SeqAIJ matrix.
2357 
2358    When d_nz, o_nz parameters are specified, d_nz storage elements are
2359    allocated for every row of the local diagonal submatrix, and o_nz
2360    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2361    One way to choose d_nz and o_nz is to use the max nonzerors per local
2362    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2363    In this case, the values of d_nz,o_nz are:
2364 .vb
2365      proc0 : dnz = 2, o_nz = 2
2366      proc1 : dnz = 3, o_nz = 2
2367      proc2 : dnz = 1, o_nz = 4
2368 .ve
2369    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2370    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2371    for proc3. i.e we are using 12+15+10=37 storage locations to store
2372    34 values.
2373 
2374    When d_nnz, o_nnz parameters are specified, the storage is specified
2375    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2376    In the above case the values for d_nnz,o_nnz are:
2377 .vb
2378      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2379      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2380      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2381 .ve
2382    Here the space allocated is sum of all the above values i.e 34, and
2383    hence pre-allocation is perfect.
2384 
2385    Level: intermediate
2386 
2387 .keywords: matrix, aij, compressed row, sparse, parallel
2388 
2389 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2390           MPIAIJ
2391 @*/
2392 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2393 {
2394   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2398   if (f) {
2399     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2400   }
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatCreateMPIAIJ"
2406 /*@C
2407    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2408    (the default parallel PETSc format).  For good matrix assembly performance
2409    the user should preallocate the matrix storage by setting the parameters
2410    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2411    performance can be increased by more than a factor of 50.
2412 
2413    Collective on MPI_Comm
2414 
2415    Input Parameters:
2416 +  comm - MPI communicator
2417 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2418            This value should be the same as the local size used in creating the
2419            y vector for the matrix-vector product y = Ax.
2420 .  n - This value should be the same as the local size used in creating the
2421        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2422        calculated if N is given) For square matrices n is almost always m.
2423 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2424 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2425 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2426            (same value is used for all local rows)
2427 .  d_nnz - array containing the number of nonzeros in the various rows of the
2428            DIAGONAL portion of the local submatrix (possibly different for each row)
2429            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2430            The size of this array is equal to the number of local rows, i.e 'm'.
2431            You must leave room for the diagonal entry even if it is zero.
2432 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2433            submatrix (same value is used for all local rows).
2434 -  o_nnz - array containing the number of nonzeros in the various rows of the
2435            OFF-DIAGONAL portion of the local submatrix (possibly different for
2436            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2437            structure. The size of this array is equal to the number
2438            of local rows, i.e 'm'.
2439 
2440    Output Parameter:
2441 .  A - the matrix
2442 
2443    Notes:
2444    If the *_nnz parameter is given then the *_nz parameter is ignored
2445 
2446    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2447    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2448    storage requirements for this matrix.
2449 
2450    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2451    processor than it must be used on all processors that share the object for
2452    that argument.
2453 
2454    The user MUST specify either the local or global matrix dimensions
2455    (possibly both).
2456 
2457    The parallel matrix is partitioned such that the first m0 rows belong to
2458    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2459    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2460 
2461    The DIAGONAL portion of the local submatrix of a processor can be defined
2462    as the submatrix which is obtained by extraction the part corresponding
2463    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2464    first row that belongs to the processor, and r2 is the last row belonging
2465    to the this processor. This is a square mxm matrix. The remaining portion
2466    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2467 
2468    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2469 
2470    When calling this routine with a single process communicator, a matrix of
2471    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2472    type of communicator, use the construction mechanism:
2473      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2474 
2475    By default, this format uses inodes (identical nodes) when possible.
2476    We search for consecutive rows with the same nonzero structure, thereby
2477    reusing matrix information to achieve increased efficiency.
2478 
2479    Options Database Keys:
2480 +  -mat_aij_no_inode  - Do not use inodes
2481 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2482 -  -mat_aij_oneindex - Internally use indexing starting at 1
2483         rather than 0.  Note that when calling MatSetValues(),
2484         the user still MUST index entries starting at 0!
2485 
2486 
2487    Example usage:
2488 
2489    Consider the following 8x8 matrix with 34 non-zero values, that is
2490    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2491    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2492    as follows:
2493 
2494 .vb
2495             1  2  0  |  0  3  0  |  0  4
2496     Proc0   0  5  6  |  7  0  0  |  8  0
2497             9  0 10  | 11  0  0  | 12  0
2498     -------------------------------------
2499            13  0 14  | 15 16 17  |  0  0
2500     Proc1   0 18  0  | 19 20 21  |  0  0
2501             0  0  0  | 22 23  0  | 24  0
2502     -------------------------------------
2503     Proc2  25 26 27  |  0  0 28  | 29  0
2504            30  0  0  | 31 32 33  |  0 34
2505 .ve
2506 
2507    This can be represented as a collection of submatrices as:
2508 
2509 .vb
2510       A B C
2511       D E F
2512       G H I
2513 .ve
2514 
2515    Where the submatrices A,B,C are owned by proc0, D,E,F are
2516    owned by proc1, G,H,I are owned by proc2.
2517 
2518    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2519    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2520    The 'M','N' parameters are 8,8, and have the same values on all procs.
2521 
2522    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2523    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2524    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2525    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2526    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2527    matrix, ans [DF] as another SeqAIJ matrix.
2528 
2529    When d_nz, o_nz parameters are specified, d_nz storage elements are
2530    allocated for every row of the local diagonal submatrix, and o_nz
2531    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2532    One way to choose d_nz and o_nz is to use the max nonzerors per local
2533    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2534    In this case, the values of d_nz,o_nz are:
2535 .vb
2536      proc0 : dnz = 2, o_nz = 2
2537      proc1 : dnz = 3, o_nz = 2
2538      proc2 : dnz = 1, o_nz = 4
2539 .ve
2540    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2541    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2542    for proc3. i.e we are using 12+15+10=37 storage locations to store
2543    34 values.
2544 
2545    When d_nnz, o_nnz parameters are specified, the storage is specified
2546    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2547    In the above case the values for d_nnz,o_nnz are:
2548 .vb
2549      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2550      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2551      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2552 .ve
2553    Here the space allocated is sum of all the above values i.e 34, and
2554    hence pre-allocation is perfect.
2555 
2556    Level: intermediate
2557 
2558 .keywords: matrix, aij, compressed row, sparse, parallel
2559 
2560 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2561           MPIAIJ
2562 @*/
2563 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2564 {
2565   PetscErrorCode ierr;
2566   PetscMPIInt    size;
2567 
2568   PetscFunctionBegin;
2569   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2570   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2571   if (size > 1) {
2572     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2573     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2574   } else {
2575     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2576     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2583 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2584 {
2585   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2586 
2587   PetscFunctionBegin;
2588   *Ad     = a->A;
2589   *Ao     = a->B;
2590   *colmap = a->garray;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 #undef __FUNCT__
2595 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2596 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2597 {
2598   PetscErrorCode ierr;
2599   PetscInt       i;
2600   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2601 
2602   PetscFunctionBegin;
2603   if (coloring->ctype == IS_COLORING_LOCAL) {
2604     ISColoringValue *allcolors,*colors;
2605     ISColoring      ocoloring;
2606 
2607     /* set coloring for diagonal portion */
2608     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2609 
2610     /* set coloring for off-diagonal portion */
2611     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2612     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2613     for (i=0; i<a->B->n; i++) {
2614       colors[i] = allcolors[a->garray[i]];
2615     }
2616     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2617     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2618     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2619     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2620   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2621     ISColoringValue *colors;
2622     PetscInt             *larray;
2623     ISColoring      ocoloring;
2624 
2625     /* set coloring for diagonal portion */
2626     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2627     for (i=0; i<a->A->n; i++) {
2628       larray[i] = i + a->cstart;
2629     }
2630     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2631     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2632     for (i=0; i<a->A->n; i++) {
2633       colors[i] = coloring->colors[larray[i]];
2634     }
2635     ierr = PetscFree(larray);CHKERRQ(ierr);
2636     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2637     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2638     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2639 
2640     /* set coloring for off-diagonal portion */
2641     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2642     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2643     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2644     for (i=0; i<a->B->n; i++) {
2645       colors[i] = coloring->colors[larray[i]];
2646     }
2647     ierr = PetscFree(larray);CHKERRQ(ierr);
2648     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2649     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2650     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2651   } else {
2652     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2653   }
2654 
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 #if defined(PETSC_HAVE_ADIC)
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2661 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2662 {
2663   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2664   PetscErrorCode ierr;
2665 
2666   PetscFunctionBegin;
2667   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2668   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2669   PetscFunctionReturn(0);
2670 }
2671 #endif
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2675 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2676 {
2677   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2678   PetscErrorCode ierr;
2679 
2680   PetscFunctionBegin;
2681   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2682   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #undef __FUNCT__
2687 #define __FUNCT__ "MatMerge"
2688 /*@C
2689       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2690                  matrices from each processor
2691 
2692     Collective on MPI_Comm
2693 
2694    Input Parameters:
2695 +    comm - the communicators the parallel matrix will live on
2696 .    inmat - the input sequential matrices
2697 .    n - number of local columns (or PETSC_DECIDE)
2698 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2699 
2700    Output Parameter:
2701 .    outmat - the parallel matrix generated
2702 
2703     Level: advanced
2704 
2705    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2706 
2707 @*/
2708 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2709 {
2710   PetscErrorCode ierr;
2711   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2712   PetscInt       *indx;
2713   PetscScalar    *values;
2714   PetscMap       columnmap,rowmap;
2715 
2716   PetscFunctionBegin;
2717     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2718   /*
2719   PetscMPIInt       rank;
2720   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2721   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2722   */
2723   if (scall == MAT_INITIAL_MATRIX){
2724     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2725     if (n == PETSC_DECIDE){
2726       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2727       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2728       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2729       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2730       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2731     }
2732 
2733     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2734     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2735     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2736     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2737     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2738 
2739     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2740     for (i=0;i<m;i++) {
2741       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2742       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2743       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2744     }
2745     /* This routine will ONLY return MPIAIJ type matrix */
2746     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2747     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2748     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2749     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2750 
2751   } else if (scall == MAT_REUSE_MATRIX){
2752     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2753   } else {
2754     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2755   }
2756 
2757   for (i=0;i<m;i++) {
2758     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2759     I    = i + rstart;
2760     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2761     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2762   }
2763   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2764   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2765   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766 
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatFileSplit"
2772 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2773 {
2774   PetscErrorCode    ierr;
2775   PetscMPIInt       rank;
2776   PetscInt          m,N,i,rstart,nnz;
2777   size_t            len;
2778   const PetscInt    *indx;
2779   PetscViewer       out;
2780   char              *name;
2781   Mat               B;
2782   const PetscScalar *values;
2783 
2784   PetscFunctionBegin;
2785   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2786   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2787   /* Should this be the type of the diagonal block of A? */
2788   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2789   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2790   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2791   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2792   for (i=0;i<m;i++) {
2793     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2794     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2795     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2796   }
2797   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799 
2800   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2801   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2802   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2803   sprintf(name,"%s.%d",outfile,rank);
2804   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2805   ierr = PetscFree(name);
2806   ierr = MatView(B,out);CHKERRQ(ierr);
2807   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2808   ierr = MatDestroy(B);CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2813 #undef __FUNCT__
2814 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2815 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2816 {
2817   PetscErrorCode       ierr;
2818   Mat_Merge_SeqsToMPI  *merge;
2819   PetscObjectContainer container;
2820 
2821   PetscFunctionBegin;
2822   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2823   if (container) {
2824     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2825     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2826     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2827     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2828     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2829     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2830     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2831     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2832     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2833     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2834     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2835     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2836 
2837     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2838     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2839   }
2840   ierr = PetscFree(merge);CHKERRQ(ierr);
2841 
2842   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 #include "src/mat/utils/freespace.h"
2847 #include "petscbt.h"
2848 #undef __FUNCT__
2849 #define __FUNCT__ "MatMerge_SeqsToMPI"
2850 /*@C
2851       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2852                  matrices from each processor
2853 
2854     Collective on MPI_Comm
2855 
2856    Input Parameters:
2857 +    comm - the communicators the parallel matrix will live on
2858 .    seqmat - the input sequential matrices
2859 .    m - number of local rows (or PETSC_DECIDE)
2860 .    n - number of local columns (or PETSC_DECIDE)
2861 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2862 
2863    Output Parameter:
2864 .    mpimat - the parallel matrix generated
2865 
2866     Level: advanced
2867 
2868    Notes:
2869      The dimensions of the sequential matrix in each processor MUST be the same.
2870      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2871      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2872 @*/
2873 static PetscEvent logkey_seqstompinum = 0;
2874 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2875 {
2876   PetscErrorCode       ierr;
2877   MPI_Comm             comm=mpimat->comm;
2878   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2879   PetscMPIInt          size,rank,taga,*len_s;
2880   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2881   PetscInt             proc,m;
2882   PetscInt             **buf_ri,**buf_rj;
2883   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2884   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2885   MPI_Request          *s_waits,*r_waits;
2886   MPI_Status           *status;
2887   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2888   Mat_Merge_SeqsToMPI  *merge;
2889   PetscObjectContainer container;
2890 
2891   PetscFunctionBegin;
2892   if (!logkey_seqstompinum) {
2893     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2894   }
2895   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2896 
2897   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2898   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2899 
2900   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2901   if (container) {
2902     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2903   }
2904   bi     = merge->bi;
2905   bj     = merge->bj;
2906   buf_ri = merge->buf_ri;
2907   buf_rj = merge->buf_rj;
2908 
2909   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2910   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2911   len_s  = merge->len_s;
2912 
2913   /* send and recv matrix values */
2914   /*-----------------------------*/
2915   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2916   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2917 
2918   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2919   for (proc=0,k=0; proc<size; proc++){
2920     if (!len_s[proc]) continue;
2921     i = owners[proc];
2922     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2923     k++;
2924   }
2925 
2926   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2927   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2928   ierr = PetscFree(status);CHKERRQ(ierr);
2929 
2930   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2931   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2932 
2933   /* insert mat values of mpimat */
2934   /*----------------------------*/
2935   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2936   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2937   nextrow = buf_ri_k + merge->nrecv;
2938   nextai  = nextrow + merge->nrecv;
2939 
2940   for (k=0; k<merge->nrecv; k++){
2941     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2942     nrows = *(buf_ri_k[k]);
2943     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2944     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2945   }
2946 
2947   /* set values of ba */
2948   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2949   for (i=0; i<m; i++) {
2950     arow = owners[rank] + i;
2951     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2952     bnzi = bi[i+1] - bi[i];
2953     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2954 
2955     /* add local non-zero vals of this proc's seqmat into ba */
2956     anzi = ai[arow+1] - ai[arow];
2957     aj   = a->j + ai[arow];
2958     aa   = a->a + ai[arow];
2959     nextaj = 0;
2960     for (j=0; nextaj<anzi; j++){
2961       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2962         ba_i[j] += aa[nextaj++];
2963       }
2964     }
2965 
2966     /* add received vals into ba */
2967     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2968       /* i-th row */
2969       if (i == *nextrow[k]) {
2970         anzi = *(nextai[k]+1) - *nextai[k];
2971         aj   = buf_rj[k] + *(nextai[k]);
2972         aa   = abuf_r[k] + *(nextai[k]);
2973         nextaj = 0;
2974         for (j=0; nextaj<anzi; j++){
2975           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2976             ba_i[j] += aa[nextaj++];
2977           }
2978         }
2979         nextrow[k]++; nextai[k]++;
2980       }
2981     }
2982     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2983   }
2984   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2985   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2986 
2987   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2988   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2989   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2990   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2991   PetscFunctionReturn(0);
2992 }
2993 static PetscEvent logkey_seqstompisym = 0;
2994 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2995 {
2996   PetscErrorCode       ierr;
2997   Mat                  B_mpi;
2998   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2999   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3000   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3001   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3002   PetscInt             len,proc,*dnz,*onz;
3003   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3004   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3005   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3006   MPI_Status           *status;
3007   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3008   PetscBT              lnkbt;
3009   Mat_Merge_SeqsToMPI  *merge;
3010   PetscObjectContainer container;
3011 
3012   PetscFunctionBegin;
3013   if (!logkey_seqstompisym) {
3014     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3015   }
3016   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3017 
3018   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3019   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3020 
3021   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3022   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3023 
3024   /* determine row ownership */
3025   /*---------------------------------------------------------*/
3026   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3027   if (m == PETSC_DECIDE) {
3028     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3029   } else {
3030     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3031   }
3032   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3033   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3034   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3035 
3036   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3037   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3038 
3039   /* determine the number of messages to send, their lengths */
3040   /*---------------------------------------------------------*/
3041   len_s  = merge->len_s;
3042 
3043   len = 0;  /* length of buf_si[] */
3044   merge->nsend = 0;
3045   for (proc=0; proc<size; proc++){
3046     len_si[proc] = 0;
3047     if (proc == rank){
3048       len_s[proc] = 0;
3049     } else {
3050       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3051       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3052     }
3053     if (len_s[proc]) {
3054       merge->nsend++;
3055       nrows = 0;
3056       for (i=owners[proc]; i<owners[proc+1]; i++){
3057         if (ai[i+1] > ai[i]) nrows++;
3058       }
3059       len_si[proc] = 2*(nrows+1);
3060       len += len_si[proc];
3061     }
3062   }
3063 
3064   /* determine the number and length of messages to receive for ij-structure */
3065   /*-------------------------------------------------------------------------*/
3066   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3067   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3068 
3069   /* post the Irecv of j-structure */
3070   /*-------------------------------*/
3071   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3072   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3073 
3074   /* post the Isend of j-structure */
3075   /*--------------------------------*/
3076   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3077   sj_waits = si_waits + merge->nsend;
3078 
3079   for (proc=0, k=0; proc<size; proc++){
3080     if (!len_s[proc]) continue;
3081     i = owners[proc];
3082     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3083     k++;
3084   }
3085 
3086   /* receives and sends of j-structure are complete */
3087   /*------------------------------------------------*/
3088   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3089   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3090 
3091   /* send and recv i-structure */
3092   /*---------------------------*/
3093   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3094   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3095 
3096   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3097   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3098   for (proc=0,k=0; proc<size; proc++){
3099     if (!len_s[proc]) continue;
3100     /* form outgoing message for i-structure:
3101          buf_si[0]:                 nrows to be sent
3102                [1:nrows]:           row index (global)
3103                [nrows+1:2*nrows+1]: i-structure index
3104     */
3105     /*-------------------------------------------*/
3106     nrows = len_si[proc]/2 - 1;
3107     buf_si_i    = buf_si + nrows+1;
3108     buf_si[0]   = nrows;
3109     buf_si_i[0] = 0;
3110     nrows = 0;
3111     for (i=owners[proc]; i<owners[proc+1]; i++){
3112       anzi = ai[i+1] - ai[i];
3113       if (anzi) {
3114         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3115         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3116         nrows++;
3117       }
3118     }
3119     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3120     k++;
3121     buf_si += len_si[proc];
3122   }
3123 
3124   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3125   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3126 
3127   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3128   for (i=0; i<merge->nrecv; i++){
3129     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3130   }
3131 
3132   ierr = PetscFree(len_si);CHKERRQ(ierr);
3133   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3134   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3135   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3136   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3137   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3138   ierr = PetscFree(status);CHKERRQ(ierr);
3139 
3140   /* compute a local seq matrix in each processor */
3141   /*----------------------------------------------*/
3142   /* allocate bi array and free space for accumulating nonzero column info */
3143   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3144   bi[0] = 0;
3145 
3146   /* create and initialize a linked list */
3147   nlnk = N+1;
3148   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3149 
3150   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3151   len = 0;
3152   len  = ai[owners[rank+1]] - ai[owners[rank]];
3153   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3154   current_space = free_space;
3155 
3156   /* determine symbolic info for each local row */
3157   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3158   nextrow = buf_ri_k + merge->nrecv;
3159   nextai  = nextrow + merge->nrecv;
3160   for (k=0; k<merge->nrecv; k++){
3161     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3162     nrows = *buf_ri_k[k];
3163     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3164     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3165   }
3166 
3167   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3168   len = 0;
3169   for (i=0;i<m;i++) {
3170     bnzi   = 0;
3171     /* add local non-zero cols of this proc's seqmat into lnk */
3172     arow   = owners[rank] + i;
3173     anzi   = ai[arow+1] - ai[arow];
3174     aj     = a->j + ai[arow];
3175     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3176     bnzi += nlnk;
3177     /* add received col data into lnk */
3178     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3179       if (i == *nextrow[k]) { /* i-th row */
3180         anzi = *(nextai[k]+1) - *nextai[k];
3181         aj   = buf_rj[k] + *nextai[k];
3182         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3183         bnzi += nlnk;
3184         nextrow[k]++; nextai[k]++;
3185       }
3186     }
3187     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3188 
3189     /* if free space is not available, make more free space */
3190     if (current_space->local_remaining<bnzi) {
3191       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3192       nspacedouble++;
3193     }
3194     /* copy data into free space, then initialize lnk */
3195     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3196     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3197 
3198     current_space->array           += bnzi;
3199     current_space->local_used      += bnzi;
3200     current_space->local_remaining -= bnzi;
3201 
3202     bi[i+1] = bi[i] + bnzi;
3203   }
3204 
3205   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3206 
3207   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3208   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3209   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3210 
3211   /* create symbolic parallel matrix B_mpi */
3212   /*---------------------------------------*/
3213   if (n==PETSC_DECIDE) {
3214     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3215   } else {
3216     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3217   }
3218   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3219   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3220   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3221 
3222   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3223   B_mpi->assembled     = PETSC_FALSE;
3224   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3225   merge->bi            = bi;
3226   merge->bj            = bj;
3227   merge->buf_ri        = buf_ri;
3228   merge->buf_rj        = buf_rj;
3229   merge->coi           = PETSC_NULL;
3230   merge->coj           = PETSC_NULL;
3231   merge->owners_co     = PETSC_NULL;
3232 
3233   /* attach the supporting struct to B_mpi for reuse */
3234   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3235   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3236   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3237   *mpimat = B_mpi;
3238   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 static PetscEvent logkey_seqstompi = 0;
3243 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3244 {
3245   PetscErrorCode   ierr;
3246 
3247   PetscFunctionBegin;
3248   if (!logkey_seqstompi) {
3249     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3250   }
3251   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3252   if (scall == MAT_INITIAL_MATRIX){
3253     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3254   }
3255   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3256   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 static PetscEvent logkey_getlocalmat = 0;
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatGetLocalMat"
3262 /*@C
3263      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3264 
3265     Not Collective
3266 
3267    Input Parameters:
3268 +    A - the matrix
3269 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3270 
3271    Output Parameter:
3272 .    A_loc - the local sequential matrix generated
3273 
3274     Level: developer
3275 
3276 @*/
3277 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3278 {
3279   PetscErrorCode  ierr;
3280   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3281   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3282   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3283   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3284   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3285   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3286 
3287   PetscFunctionBegin;
3288   if (!logkey_getlocalmat) {
3289     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3290   }
3291   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3292   if (scall == MAT_INITIAL_MATRIX){
3293     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3294     ci[0] = 0;
3295     for (i=0; i<am; i++){
3296       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3297     }
3298     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3299     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3300     k = 0;
3301     for (i=0; i<am; i++) {
3302       ncols_o = bi[i+1] - bi[i];
3303       ncols_d = ai[i+1] - ai[i];
3304       /* off-diagonal portion of A */
3305       for (jo=0; jo<ncols_o; jo++) {
3306         col = cmap[*bj];
3307         if (col >= cstart) break;
3308         cj[k]   = col; bj++;
3309         ca[k++] = *ba++;
3310       }
3311       /* diagonal portion of A */
3312       for (j=0; j<ncols_d; j++) {
3313         cj[k]   = cstart + *aj++;
3314         ca[k++] = *aa++;
3315       }
3316       /* off-diagonal portion of A */
3317       for (j=jo; j<ncols_o; j++) {
3318         cj[k]   = cmap[*bj++];
3319         ca[k++] = *ba++;
3320       }
3321     }
3322     /* put together the new matrix */
3323     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3324     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3325     /* Since these are PETSc arrays, change flags to free them as necessary. */
3326     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3327     mat->freedata = PETSC_TRUE;
3328     mat->nonew    = 0;
3329   } else if (scall == MAT_REUSE_MATRIX){
3330     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3331     ci = mat->i; cj = mat->j; ca = mat->a;
3332     for (i=0; i<am; i++) {
3333       /* off-diagonal portion of A */
3334       ncols_o = bi[i+1] - bi[i];
3335       for (jo=0; jo<ncols_o; jo++) {
3336         col = cmap[*bj];
3337         if (col >= cstart) break;
3338         *ca++ = *ba++; bj++;
3339       }
3340       /* diagonal portion of A */
3341       ncols_d = ai[i+1] - ai[i];
3342       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3343       /* off-diagonal portion of A */
3344       for (j=jo; j<ncols_o; j++) {
3345         *ca++ = *ba++; bj++;
3346       }
3347     }
3348   } else {
3349     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3350   }
3351 
3352   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 static PetscEvent logkey_getlocalmatcondensed = 0;
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatGetLocalMatCondensed"
3359 /*@C
3360      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3361 
3362     Not Collective
3363 
3364    Input Parameters:
3365 +    A - the matrix
3366 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3367 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3368 
3369    Output Parameter:
3370 .    A_loc - the local sequential matrix generated
3371 
3372     Level: developer
3373 
3374 @*/
3375 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3376 {
3377   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3378   PetscErrorCode    ierr;
3379   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3380   IS                isrowa,iscola;
3381   Mat               *aloc;
3382 
3383   PetscFunctionBegin;
3384   if (!logkey_getlocalmatcondensed) {
3385     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3386   }
3387   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3388   if (!row){
3389     start = a->rstart; end = a->rend;
3390     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3391   } else {
3392     isrowa = *row;
3393   }
3394   if (!col){
3395     start = a->cstart;
3396     cmap  = a->garray;
3397     nzA   = a->A->n;
3398     nzB   = a->B->n;
3399     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3400     ncols = 0;
3401     for (i=0; i<nzB; i++) {
3402       if (cmap[i] < start) idx[ncols++] = cmap[i];
3403       else break;
3404     }
3405     imark = i;
3406     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3407     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3408     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3409     ierr = PetscFree(idx);CHKERRQ(ierr);
3410   } else {
3411     iscola = *col;
3412   }
3413   if (scall != MAT_INITIAL_MATRIX){
3414     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3415     aloc[0] = *A_loc;
3416   }
3417   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3418   *A_loc = aloc[0];
3419   ierr = PetscFree(aloc);CHKERRQ(ierr);
3420   if (!row){
3421     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3422   }
3423   if (!col){
3424     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3425   }
3426   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 static PetscEvent logkey_GetBrowsOfAcols = 0;
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatGetBrowsOfAcols"
3433 /*@C
3434     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3435 
3436     Collective on Mat
3437 
3438    Input Parameters:
3439 +    A,B - the matrices in mpiaij format
3440 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3441 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3442 
3443    Output Parameter:
3444 +    rowb, colb - index sets of rows and columns of B to extract
3445 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3446 -    B_seq - the sequential matrix generated
3447 
3448     Level: developer
3449 
3450 @*/
3451 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3452 {
3453   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3454   PetscErrorCode    ierr;
3455   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3456   IS                isrowb,iscolb;
3457   Mat               *bseq;
3458 
3459   PetscFunctionBegin;
3460   if (a->cstart != b->rstart || a->cend != b->rend){
3461     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3462   }
3463   if (!logkey_GetBrowsOfAcols) {
3464     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3465   }
3466   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3467 
3468   if (scall == MAT_INITIAL_MATRIX){
3469     start = a->cstart;
3470     cmap  = a->garray;
3471     nzA   = a->A->n;
3472     nzB   = a->B->n;
3473     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3474     ncols = 0;
3475     for (i=0; i<nzB; i++) {  /* row < local row index */
3476       if (cmap[i] < start) idx[ncols++] = cmap[i];
3477       else break;
3478     }
3479     imark = i;
3480     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3481     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3482     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3483     ierr = PetscFree(idx);CHKERRQ(ierr);
3484     *brstart = imark;
3485     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3486   } else {
3487     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3488     isrowb = *rowb; iscolb = *colb;
3489     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3490     bseq[0] = *B_seq;
3491   }
3492   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3493   *B_seq = bseq[0];
3494   ierr = PetscFree(bseq);CHKERRQ(ierr);
3495   if (!rowb){
3496     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3497   } else {
3498     *rowb = isrowb;
3499   }
3500   if (!colb){
3501     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3502   } else {
3503     *colb = iscolb;
3504   }
3505   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3506   PetscFunctionReturn(0);
3507 }
3508 
3509 static PetscEvent logkey_GetBrowsOfAocols = 0;
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatGetBrowsOfAoCols"
3512 /*@C
3513     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3514     of the OFF-DIAGONAL portion of local A
3515 
3516     Collective on Mat
3517 
3518    Input Parameters:
3519 +    A,B - the matrices in mpiaij format
3520 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3521 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3522 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3523 
3524    Output Parameter:
3525 +    B_oth - the sequential matrix generated
3526 
3527     Level: developer
3528 
3529 @*/
3530 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3531 {
3532   VecScatter_MPI_General *gen_to,*gen_from;
3533   PetscErrorCode         ierr;
3534   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3535   Mat_SeqAIJ             *b_oth;
3536   VecScatter             ctx=a->Mvctx;
3537   MPI_Comm               comm=ctx->comm;
3538   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3539   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3540   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3541   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3542   MPI_Request            *rwaits,*swaits;
3543   MPI_Status             *sstatus,rstatus;
3544   PetscInt               *cols;
3545   PetscScalar            *vals;
3546   PetscMPIInt            j;
3547 
3548   PetscFunctionBegin;
3549   if (a->cstart != b->rstart || a->cend != b->rend){
3550     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3551   }
3552   if (!logkey_GetBrowsOfAocols) {
3553     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3554   }
3555   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3556   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3557 
3558   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3559   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3560   rvalues  = gen_from->values; /* holds the length of sending row */
3561   svalues  = gen_to->values;   /* holds the length of receiving row */
3562   nrecvs   = gen_from->n;
3563   nsends   = gen_to->n;
3564   rwaits   = gen_from->requests;
3565   swaits   = gen_to->requests;
3566   srow     = gen_to->indices;   /* local row index to be sent */
3567   rstarts  = gen_from->starts;
3568   sstarts  = gen_to->starts;
3569   rprocs   = gen_from->procs;
3570   sprocs   = gen_to->procs;
3571   sstatus  = gen_to->sstatus;
3572 
3573   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3574   if (scall == MAT_INITIAL_MATRIX){
3575     /* i-array */
3576     /*---------*/
3577     /*  post receives */
3578     for (i=0; i<nrecvs; i++){
3579       rowlen = (PetscInt*)rvalues + rstarts[i];
3580       nrows = rstarts[i+1]-rstarts[i];
3581       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3582     }
3583 
3584     /* pack the outgoing message */
3585     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3586     rstartsj = sstartsj + nsends +1;
3587     sstartsj[0] = 0;  rstartsj[0] = 0;
3588     len = 0; /* total length of j or a array to be sent */
3589     k = 0;
3590     for (i=0; i<nsends; i++){
3591       rowlen = (PetscInt*)svalues + sstarts[i];
3592       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3593       for (j=0; j<nrows; j++) {
3594         row = srow[k] + b->rowners[rank]; /* global row idx */
3595         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3596         len += rowlen[j];
3597         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3598         k++;
3599       }
3600       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3601        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3602     }
3603     /* recvs and sends of i-array are completed */
3604     i = nrecvs;
3605     while (i--) {
3606       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3607     }
3608     if (nsends) {
3609       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3610     }
3611     /* allocate buffers for sending j and a arrays */
3612     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3613     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3614 
3615     /* create i-array of B_oth */
3616     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3617     b_othi[0] = 0;
3618     len = 0; /* total length of j or a array to be received */
3619     k = 0;
3620     for (i=0; i<nrecvs; i++){
3621       rowlen = (PetscInt*)rvalues + rstarts[i];
3622       nrows = rstarts[i+1]-rstarts[i];
3623       for (j=0; j<nrows; j++) {
3624         b_othi[k+1] = b_othi[k] + rowlen[j];
3625         len += rowlen[j]; k++;
3626       }
3627       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3628     }
3629 
3630     /* allocate space for j and a arrrays of B_oth */
3631     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3632     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3633 
3634     /* j-array */
3635     /*---------*/
3636     /*  post receives of j-array */
3637     for (i=0; i<nrecvs; i++){
3638       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3639       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3640     }
3641     k = 0;
3642     for (i=0; i<nsends; i++){
3643       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3644       bufJ = bufj+sstartsj[i];
3645       for (j=0; j<nrows; j++) {
3646         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3647         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3648         for (l=0; l<ncols; l++){
3649           *bufJ++ = cols[l];
3650         }
3651         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3652       }
3653       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3654     }
3655 
3656     /* recvs and sends of j-array are completed */
3657     i = nrecvs;
3658     while (i--) {
3659       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3660     }
3661     if (nsends) {
3662       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3663     }
3664   } else if (scall == MAT_REUSE_MATRIX){
3665     sstartsj = *startsj;
3666     rstartsj = sstartsj + nsends +1;
3667     bufa     = *bufa_ptr;
3668     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3669     b_otha   = b_oth->a;
3670   } else {
3671     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3672   }
3673 
3674   /* a-array */
3675   /*---------*/
3676   /*  post receives of a-array */
3677   for (i=0; i<nrecvs; i++){
3678     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3679     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3680   }
3681   k = 0;
3682   for (i=0; i<nsends; i++){
3683     nrows = sstarts[i+1]-sstarts[i];
3684     bufA = bufa+sstartsj[i];
3685     for (j=0; j<nrows; j++) {
3686       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3687       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3688       for (l=0; l<ncols; l++){
3689         *bufA++ = vals[l];
3690       }
3691       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3692 
3693     }
3694     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3695   }
3696   /* recvs and sends of a-array are completed */
3697   i = nrecvs;
3698   while (i--) {
3699     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3700   }
3701    if (nsends) {
3702     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3703   }
3704 
3705   if (scall == MAT_INITIAL_MATRIX){
3706     /* put together the new matrix */
3707     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3708 
3709     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3710     /* Since these are PETSc arrays, change flags to free them as necessary. */
3711     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3712     b_oth->freedata = PETSC_TRUE;
3713     b_oth->nonew    = 0;
3714 
3715     ierr = PetscFree(bufj);CHKERRQ(ierr);
3716     if (!startsj || !bufa_ptr){
3717       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3718       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3719     } else {
3720       *startsj  = sstartsj;
3721       *bufa_ptr = bufa;
3722     }
3723   }
3724   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3725 
3726   PetscFunctionReturn(0);
3727 }
3728 
3729 /*MC
3730    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3731 
3732    Options Database Keys:
3733 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3734 
3735   Level: beginner
3736 
3737 .seealso: MatCreateMPIAIJ
3738 M*/
3739 
3740 EXTERN_C_BEGIN
3741 #undef __FUNCT__
3742 #define __FUNCT__ "MatCreate_MPIAIJ"
3743 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3744 {
3745   Mat_MPIAIJ     *b;
3746   PetscErrorCode ierr;
3747   PetscInt       i;
3748   PetscMPIInt    size;
3749 
3750   PetscFunctionBegin;
3751   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3752 
3753   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3754   B->data         = (void*)b;
3755   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3756   B->factor       = 0;
3757   B->bs           = 1;
3758   B->assembled    = PETSC_FALSE;
3759   B->mapping      = 0;
3760 
3761   B->insertmode      = NOT_SET_VALUES;
3762   b->size            = size;
3763   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3764 
3765   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3766   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3767 
3768   /* the information in the maps duplicates the information computed below, eventually
3769      we should remove the duplicate information that is not contained in the maps */
3770   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3771   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3772 
3773   /* build local table of row and column ownerships */
3774   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3775   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3776   b->cowners = b->rowners + b->size + 2;
3777   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3778   b->rowners[0] = 0;
3779   for (i=2; i<=b->size; i++) {
3780     b->rowners[i] += b->rowners[i-1];
3781   }
3782   b->rstart = b->rowners[b->rank];
3783   b->rend   = b->rowners[b->rank+1];
3784   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3785   b->cowners[0] = 0;
3786   for (i=2; i<=b->size; i++) {
3787     b->cowners[i] += b->cowners[i-1];
3788   }
3789   b->cstart = b->cowners[b->rank];
3790   b->cend   = b->cowners[b->rank+1];
3791 
3792   /* build cache for off array entries formed */
3793   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3794   b->donotstash  = PETSC_FALSE;
3795   b->colmap      = 0;
3796   b->garray      = 0;
3797   b->roworiented = PETSC_TRUE;
3798 
3799   /* stuff used for matrix vector multiply */
3800   b->lvec      = PETSC_NULL;
3801   b->Mvctx     = PETSC_NULL;
3802 
3803   /* stuff for MatGetRow() */
3804   b->rowindices   = 0;
3805   b->rowvalues    = 0;
3806   b->getrowactive = PETSC_FALSE;
3807 
3808   /* Explicitly create 2 MATSEQAIJ matrices. */
3809   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3810   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3811   PetscLogObjectParent(B,b->A);
3812   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3813   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3814   PetscLogObjectParent(B,b->B);
3815 
3816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3817                                      "MatStoreValues_MPIAIJ",
3818                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3820                                      "MatRetrieveValues_MPIAIJ",
3821                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3823 				     "MatGetDiagonalBlock_MPIAIJ",
3824                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3826 				     "MatIsTranspose_MPIAIJ",
3827 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3829 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3830 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3832 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3833 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3835 				     "MatDiagonalScaleLocal_MPIAIJ",
3836 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839 EXTERN_C_END
3840