xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9ee9b77b2cca71d2a08ebc06fc4da7cd72e3025a)
1 
2 #include "src/mat/impls/aij/mpi/mpiaij.h"
3 #include "src/inline/spops.h"
4 
5 /*
6   Local utility routine that creates a mapping from the global column
7 number to the local number in the off-diagonal part of the local
8 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
9 a slightly higher hash table cost; without it it is not scalable (each processor
10 has an order N integer array but is fast to acess.
11 */
12 #undef __FUNCT__
13 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
14 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
15 {
16   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
17   PetscErrorCode ierr;
18   PetscInt       n = aij->B->n,i;
19 
20   PetscFunctionBegin;
21 #if defined (PETSC_USE_CTABLE)
22   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
23   for (i=0; i<n; i++){
24     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
25   }
26 #else
27   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
28   PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));
29   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
31 #endif
32   PetscFunctionReturn(0);
33 }
34 
35 #define CHUNKSIZE   15
36 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
37 { \
38  \
39     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
40     rmax = aimax[row]; nrow = ailen[row];  \
41     col1 = col - shift; \
42      \
43     low = 0; high = nrow; \
44     while (high-low > 5) { \
45       t = (low+high)/2; \
46       if (rp[t] > col) high = t; \
47       else             low  = t; \
48     } \
49       for (_i=low; _i<high; _i++) { \
50         if (rp[_i] > col1) break; \
51         if (rp[_i] == col1) { \
52           if (addv == ADD_VALUES) ap[_i] += value;   \
53           else                    ap[_i] = value; \
54           goto a_noinsert; \
55         } \
56       }  \
57       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
58       if (nonew == 1) goto a_noinsert; \
59       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
60       if (nrow >= rmax) { \
61         /* there is no extra room in row, therefore enlarge */ \
62         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
63         PetscScalar *new_a; \
64  \
65         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
66  \
67         /* malloc new storage space */ \
68         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
69         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
70         new_j   = (PetscInt*)(new_a + new_nz); \
71         new_i   = new_j + new_nz; \
72  \
73         /* copy over old data into new slots */ \
74         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
75         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
76         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
77         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
78         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
79                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
80         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
81         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
82                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
83         /* free up old matrix storage */ \
84  \
85         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
86         if (!a->singlemalloc) { \
87            ierr = PetscFree(a->i);CHKERRQ(ierr); \
88            ierr = PetscFree(a->j);CHKERRQ(ierr); \
89         } \
90         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
91         a->singlemalloc = PETSC_TRUE; \
92  \
93         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
94         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
95         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
96         a->maxnz += CHUNKSIZE; \
97         a->reallocs++; \
98       } \
99       N = nrow++ - 1; a->nz++; \
100       /* shift up all the later entries in this row */ \
101       for (ii=N; ii>=_i; ii--) { \
102         rp[ii+1] = rp[ii]; \
103         ap[ii+1] = ap[ii]; \
104       } \
105       rp[_i] = col1;  \
106       ap[_i] = value;  \
107       a_noinsert: ; \
108       ailen[row] = nrow; \
109 }
110 
111 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
112 { \
113  \
114     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
115     rmax = bimax[row]; nrow = bilen[row];  \
116     col1 = col - shift; \
117      \
118     low = 0; high = nrow; \
119     while (high-low > 5) { \
120       t = (low+high)/2; \
121       if (rp[t] > col) high = t; \
122       else             low  = t; \
123     } \
124        for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > col1) break; \
126         if (rp[_i] == col1) { \
127           if (addv == ADD_VALUES) ap[_i] += value;   \
128           else                    ap[_i] = value; \
129           goto b_noinsert; \
130         } \
131       }  \
132       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
133       if (nonew == 1) goto b_noinsert; \
134       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       if (nrow >= rmax) { \
136         /* there is no extra room in row, therefore enlarge */ \
137         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
138         PetscScalar *new_a; \
139  \
140         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
141  \
142         /* malloc new storage space */ \
143         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
144         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
145         new_j   = (PetscInt*)(new_a + new_nz); \
146         new_i   = new_j + new_nz; \
147  \
148         /* copy over old data into new slots */ \
149         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
150         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
151         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(PetscInt));CHKERRQ(ierr); \
152         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
153         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
154                                                            len*sizeof(PetscInt));CHKERRQ(ierr); \
155         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
156         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
157                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
158         /* free up old matrix storage */ \
159  \
160         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
161         if (!b->singlemalloc) { \
162           ierr = PetscFree(b->i);CHKERRQ(ierr); \
163           ierr = PetscFree(b->j);CHKERRQ(ierr); \
164         } \
165         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
166         b->singlemalloc = PETSC_TRUE; \
167  \
168         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
169         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
170         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar))); \
171         b->maxnz += CHUNKSIZE; \
172         b->reallocs++; \
173       } \
174       N = nrow++ - 1; b->nz++; \
175       /* shift up all the later entries in this row */ \
176       for (ii=N; ii>=_i; ii--) { \
177         rp[ii+1] = rp[ii]; \
178         ap[ii+1] = ap[ii]; \
179       } \
180       rp[_i] = col1;  \
181       ap[_i] = value;  \
182       b_noinsert: ; \
183       bilen[row] = nrow; \
184 }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIAIJ"
188 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
191   PetscScalar    value;
192   PetscErrorCode ierr;
193   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
194   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
195   PetscTruth     roworiented = aij->roworiented;
196 
197   /* Some Variables required in the macro */
198   Mat            A = aij->A;
199   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
200   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
201   PetscScalar    *aa = a->a;
202   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
203   Mat            B = aij->B;
204   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
205   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
206   PetscScalar    *ba = b->a;
207 
208   PetscInt       *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
209   PetscInt       nonew = a->nonew,shift=0;
210   PetscScalar    *ap;
211 
212   PetscFunctionBegin;
213   for (i=0; i<m; i++) {
214     if (im[i] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
217 #endif
218     if (im[i] >= rstart && im[i] < rend) {
219       row = im[i] - rstart;
220       for (j=0; j<n; j++) {
221         if (in[j] >= cstart && in[j] < cend){
222           col = in[j] - cstart;
223           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         } else if (in[j] < 0) continue;
228 #if defined(PETSC_USE_DEBUG)
229         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
230 #endif
231         else {
232           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
233           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
234           if (mat->was_assembled) {
235             if (!aij->colmap) {
236               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined (PETSC_USE_CTABLE)
239             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
240 	    col--;
241 #else
242             col = aij->colmap[in[j]] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
245               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
246               col =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
248               B = aij->B;
249               b = (Mat_SeqAIJ*)B->data;
250               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
251               ba = b->a;
252             }
253           } else col = in[j];
254           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (!aij->donotstash) {
260         if (roworiented) {
261           if (ignorezeroentries && v[i*n] == 0.0) continue;
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
263         } else {
264           if (ignorezeroentries && v[i] == 0.0) continue;
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatGetValues_MPIAIJ"
275 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
276 {
277   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
278   PetscErrorCode ierr;
279   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
280   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
281 
282   PetscFunctionBegin;
283   for (i=0; i<m; i++) {
284     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
285     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
286     if (idxm[i] >= rstart && idxm[i] < rend) {
287       row = idxm[i] - rstart;
288       for (j=0; j<n; j++) {
289         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
290         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
291         if (idxn[j] >= cstart && idxn[j] < cend){
292           col = idxn[j] - cstart;
293           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
294         } else {
295           if (!aij->colmap) {
296             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
297           }
298 #if defined (PETSC_USE_CTABLE)
299           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
300           col --;
301 #else
302           col = aij->colmap[idxn[j]] - 1;
303 #endif
304           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
305           else {
306             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
307           }
308         }
309       }
310     } else {
311       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
312     }
313   }
314   PetscFunctionReturn(0);
315 }
316 
317 #undef __FUNCT__
318 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
319 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
320 {
321   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
322   PetscErrorCode ierr;
323   PetscInt       nstash,reallocs;
324   InsertMode     addv;
325 
326   PetscFunctionBegin;
327   if (aij->donotstash) {
328     PetscFunctionReturn(0);
329   }
330 
331   /* make sure all processors are either in INSERTMODE or ADDMODE */
332   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
333   if (addv == (ADD_VALUES|INSERT_VALUES)) {
334     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
335   }
336   mat->insertmode = addv; /* in case this processor had no cache */
337 
338   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
339   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
340   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
346 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
347 {
348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
349   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
350   PetscErrorCode ierr;
351   PetscMPIInt    n;
352   PetscInt       i,j,rstart,ncols,flg;
353   PetscInt       *row,*col,other_disassembled;
354   PetscScalar    *val;
355   InsertMode     addv = mat->insertmode;
356 
357   PetscFunctionBegin;
358   if (!aij->donotstash) {
359     while (1) {
360       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
361       if (!flg) break;
362 
363       for (i=0; i<n;) {
364         /* Now identify the consecutive vals belonging to the same row */
365         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
366         if (j < n) ncols = j-i;
367         else       ncols = n-i;
368         /* Now assemble all these values with a single function call */
369         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
370         i = j;
371       }
372     }
373     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
374   }
375   a->compressedrow.use     = PETSC_FALSE;
376   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
377   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
378 
379   /* determine if any processor has disassembled, if so we must
380      also disassemble ourselfs, in order that we may reassemble. */
381   /*
382      if nonzero structure of submatrix B cannot change then we know that
383      no processor disassembled thus we can skip this stuff
384   */
385   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
386     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
387     if (mat->was_assembled && !other_disassembled) {
388       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
389       /* reaccess the b because aij->B was changed */
390       b    = (Mat_SeqAIJ *)aij->B->data;
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397 
398   b->inode.use         = PETSC_FALSE;
399   b->compressedrow.use = PETSC_TRUE;
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412   PetscFunctionReturn(0);
413 }
414 
415 #undef __FUNCT__
416 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
417 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
418 {
419   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
420   PetscErrorCode ierr;
421 
422   PetscFunctionBegin;
423   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
424   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatZeroRows_MPIAIJ"
430 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
431 {
432   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
433   PetscErrorCode ierr;
434   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag;
435   PetscInt       i,N,*rows,*owners = l->rowners;
436   PetscInt       *nprocs,j,idx,nsends,row;
437   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
438   PetscInt       *rvalues,count,base,slen,*source;
439   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
440   MPI_Comm       comm = A->comm;
441   MPI_Request    *send_waits,*recv_waits;
442   MPI_Status     recv_status,*send_status;
443   IS             istmp;
444   PetscTruth     found;
445 
446   PetscFunctionBegin;
447   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
448   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
449 
450   /*  first count number of contributors to each processor */
451   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
452   ierr   = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
454   for (i=0; i<N; i++) {
455     idx = rows[i];
456     found = PETSC_FALSE;
457     for (j=0; j<size; j++) {
458       if (idx >= owners[j] && idx < owners[j+1]) {
459         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
460       }
461     }
462     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
463   }
464   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
465 
466   /* inform other processors of number of messages and max length*/
467   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
468 
469   /* post receives:   */
470   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
471   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472   for (i=0; i<nrecvs; i++) {
473     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
474   }
475 
476   /* do sends:
477       1) starts[i] gives the starting index in svalues for stuff going to
478          the ith processor
479   */
480   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
481   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
482   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
483   starts[0] = 0;
484   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
485   for (i=0; i<N; i++) {
486     svalues[starts[owner[i]]++] = rows[i];
487   }
488   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
489 
490   starts[0] = 0;
491   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   count = 0;
493   for (i=0; i<size; i++) {
494     if (nprocs[2*i+1]) {
495       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
496     }
497   }
498   ierr = PetscFree(starts);CHKERRQ(ierr);
499 
500   base = owners[rank];
501 
502   /*  wait on receives */
503   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
504   source = lens + nrecvs;
505   count  = nrecvs; slen = 0;
506   while (count) {
507     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
508     /* unpack receives into our local space */
509     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
510     source[imdex]  = recv_status.MPI_SOURCE;
511     lens[imdex]    = n;
512     slen          += n;
513     count--;
514   }
515   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
516 
517   /* move the data into the send scatter */
518   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
519   count = 0;
520   for (i=0; i<nrecvs; i++) {
521     values = rvalues + i*nmax;
522     for (j=0; j<lens[i]; j++) {
523       lrows[count++] = values[j] - base;
524     }
525   }
526   ierr = PetscFree(rvalues);CHKERRQ(ierr);
527   ierr = PetscFree(lens);CHKERRQ(ierr);
528   ierr = PetscFree(owner);CHKERRQ(ierr);
529   ierr = PetscFree(nprocs);CHKERRQ(ierr);
530 
531   /* actually zap the local rows */
532   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
533   PetscLogObjectParent(A,istmp);
534 
535   /*
536         Zero the required rows. If the "diagonal block" of the matrix
537      is square and the user wishes to set the diagonal we use seperate
538      code so that MatSetValues() is not called for each diagonal allocating
539      new memory, thus calling lots of mallocs and slowing things down.
540 
541        Contributed by: Mathew Knepley
542   */
543   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
544   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
545   if (diag && (l->A->M == l->A->N)) {
546     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
547   } else if (diag) {
548     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
549     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
550       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
551 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
552     }
553     for (i = 0; i < slen; i++) {
554       row  = lrows[i] + rstart;
555       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
556     }
557     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
558     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559   } else {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561   }
562   ierr = ISDestroy(istmp);CHKERRQ(ierr);
563   ierr = PetscFree(lrows);CHKERRQ(ierr);
564 
565   /* wait on sends */
566   if (nsends) {
567     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
568     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
569     ierr = PetscFree(send_status);CHKERRQ(ierr);
570   }
571   ierr = PetscFree(send_waits);CHKERRQ(ierr);
572   ierr = PetscFree(svalues);CHKERRQ(ierr);
573 
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatMult_MPIAIJ"
579 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
580 {
581   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
582   PetscErrorCode ierr;
583   PetscInt       nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
602   PetscErrorCode ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 EXTERN_C_BEGIN
634 #undef __FUNCT__
635 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
636 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
637 {
638   MPI_Comm       comm;
639   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
640   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
641   IS             Me,Notme;
642   PetscErrorCode ierr;
643   PetscInt       M,N,first,last,*notme,i;
644   PetscMPIInt    size;
645 
646   PetscFunctionBegin;
647 
648   /* Easy test: symmetric diagonal block */
649   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
650   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
651   if (!*f) PetscFunctionReturn(0);
652   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
653   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
654   if (size == 1) PetscFunctionReturn(0);
655 
656   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
657   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
663   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
664   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
665   Aoff = Aoffs[0];
666   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
667   Boff = Boffs[0];
668   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
670   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
671   ierr = ISDestroy(Me);CHKERRQ(ierr);
672   ierr = ISDestroy(Notme);CHKERRQ(ierr);
673 
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
680 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
681 {
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683   PetscErrorCode ierr;
684 
685   PetscFunctionBegin;
686   /* do nondiagonal part */
687   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
705 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   PetscErrorCode ierr;
708   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
709 
710   PetscFunctionBegin;
711   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
712   if (a->rstart != a->cstart || a->rend != a->cend) {
713     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
714   }
715   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatScale_MPIAIJ"
721 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
722 {
723   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
724   PetscErrorCode ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
728   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPIAIJ"
734 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
735 {
736   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
747 #if defined (PETSC_USE_CTABLE)
748   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
749 #else
750   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
751 #endif
752   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
753   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
754   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
755   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
756   ierr = PetscFree(aij);CHKERRQ(ierr);
757 
758   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
759   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
760   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
761   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
762   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
763   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
764   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 
768 #undef __FUNCT__
769 #define __FUNCT__ "MatView_MPIAIJ_Binary"
770 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
771 {
772   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
773   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
774   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
775   PetscErrorCode    ierr;
776   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
777   int               fd;
778   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
779   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
780   PetscScalar       *column_values;
781 
782   PetscFunctionBegin;
783   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
784   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
785   nz   = A->nz + B->nz;
786   if (!rank) {
787     header[0] = MAT_FILE_COOKIE;
788     header[1] = mat->M;
789     header[2] = mat->N;
790     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
791     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
792     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
793     /* get largest number of rows any processor has */
794     rlen = mat->m;
795     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
796     for (i=1; i<size; i++) {
797       rlen = PetscMax(rlen,range[i+1] - range[i]);
798     }
799   } else {
800     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
801     rlen = mat->m;
802   }
803 
804   /* load up the local row counts */
805   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
806   for (i=0; i<mat->m; i++) {
807     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
808   }
809 
810   /* store the row lengths to the file */
811   if (!rank) {
812     MPI_Status status;
813     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
814     for (i=1; i<size; i++) {
815       rlen = range[i+1] - range[i];
816       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
817       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     }
819   } else {
820     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
823 
824   /* load up the local column indices */
825   nzmax = nz; /* )th processor needs space a largest processor needs */
826   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
827   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
828   cnt  = 0;
829   for (i=0; i<mat->m; i++) {
830     for (j=B->i[i]; j<B->i[i+1]; j++) {
831       if ( (col = garray[B->j[j]]) > cstart) break;
832       column_indices[cnt++] = col;
833     }
834     for (k=A->i[i]; k<A->i[i+1]; k++) {
835       column_indices[cnt++] = A->j[k] + cstart;
836     }
837     for (; j<B->i[i+1]; j++) {
838       column_indices[cnt++] = garray[B->j[j]];
839     }
840   }
841   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
842 
843   /* store the column indices to the file */
844   if (!rank) {
845     MPI_Status status;
846     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
847     for (i=1; i<size; i++) {
848       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
849       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
850       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
851       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
852     }
853   } else {
854     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
855     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
856   }
857   ierr = PetscFree(column_indices);CHKERRQ(ierr);
858 
859   /* load up the local column values */
860   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
861   cnt  = 0;
862   for (i=0; i<mat->m; i++) {
863     for (j=B->i[i]; j<B->i[i+1]; j++) {
864       if ( garray[B->j[j]] > cstart) break;
865       column_values[cnt++] = B->a[j];
866     }
867     for (k=A->i[i]; k<A->i[i+1]; k++) {
868       column_values[cnt++] = A->a[k];
869     }
870     for (; j<B->i[i+1]; j++) {
871       column_values[cnt++] = B->a[j];
872     }
873   }
874   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
875 
876   /* store the column values to the file */
877   if (!rank) {
878     MPI_Status status;
879     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
880     for (i=1; i<size; i++) {
881       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
882       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
883       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
884       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
885     }
886   } else {
887     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
888     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
889   }
890   ierr = PetscFree(column_values);CHKERRQ(ierr);
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
896 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
897 {
898   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
899   PetscErrorCode    ierr;
900   PetscMPIInt       rank = aij->rank,size = aij->size;
901   PetscTruth        isdraw,iascii,isbinary;
902   PetscViewer       sviewer;
903   PetscViewerFormat format;
904 
905   PetscFunctionBegin;
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
908   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
909   if (iascii) {
910     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
911     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
912       MatInfo    info;
913       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
914       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
915       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
916       if (!a->inode.size) {
917         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
918 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
919       } else {
920         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
921 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
922       }
923       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
924       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
925       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
927       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
928       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
929       PetscFunctionReturn(0);
930     } else if (format == PETSC_VIEWER_ASCII_INFO) {
931       Mat_SeqAIJ *a = (Mat_SeqAIJ*) aij->A->data;
932       if (a->inode.size) {
933         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",a->inode.node_count,a->inode.limit);CHKERRQ(ierr);
934       } else {
935         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
936       }
937       PetscFunctionReturn(0);
938     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
939       PetscFunctionReturn(0);
940     }
941   } else if (isbinary) {
942     if (size == 1) {
943       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
944       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
945     } else {
946       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
947     }
948     PetscFunctionReturn(0);
949   } else if (isdraw) {
950     PetscDraw  draw;
951     PetscTruth isnull;
952     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
953     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
954   }
955 
956   if (size == 1) {
957     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
958     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
959   } else {
960     /* assemble the entire matrix onto first processor. */
961     Mat         A;
962     Mat_SeqAIJ  *Aloc;
963     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
964     PetscScalar *a;
965 
966     if (!rank) {
967       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
968     } else {
969       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
970     }
971     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
972     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
973     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
974     PetscLogObjectParent(mat,A);
975 
976     /* copy over the A part */
977     Aloc = (Mat_SeqAIJ*)aij->A->data;
978     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
979     row = aij->rstart;
980     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
984     }
985     aj = Aloc->j;
986     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
987 
988     /* copy over the B part */
989     Aloc = (Mat_SeqAIJ*)aij->B->data;
990     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
991     row  = aij->rstart;
992     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
993     ct   = cols;
994     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
995     for (i=0; i<m; i++) {
996       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
997       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
998     }
999     ierr = PetscFree(ct);CHKERRQ(ierr);
1000     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1001     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002     /*
1003        Everyone has to call to draw the matrix since the graphics waits are
1004        synchronized across all processors that share the PetscDraw object
1005     */
1006     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1007     if (!rank) {
1008       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1009       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1010     }
1011     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1012     ierr = MatDestroy(A);CHKERRQ(ierr);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatView_MPIAIJ"
1019 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1020 {
1021   PetscErrorCode ierr;
1022   PetscTruth     iascii,isdraw,issocket,isbinary;
1023 
1024   PetscFunctionBegin;
1025   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1026   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1027   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1028   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1029   if (iascii || isdraw || isbinary || issocket) {
1030     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1031   } else {
1032     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1033   }
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatRelax_MPIAIJ"
1041 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1042 {
1043   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1044   PetscErrorCode ierr;
1045   Vec            bb1;
1046   PetscScalar    mone=-1.0;
1047 
1048   PetscFunctionBegin;
1049   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1050 
1051   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1052 
1053   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1054     if (flag & SOR_ZERO_INITIAL_GUESS) {
1055       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1056       its--;
1057     }
1058 
1059     while (its--) {
1060       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1061       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1062 
1063       /* update rhs: bb1 = bb - B*x */
1064       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1065       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1066 
1067       /* local sweep */
1068       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1069       CHKERRQ(ierr);
1070     }
1071   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1072     if (flag & SOR_ZERO_INITIAL_GUESS) {
1073       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1074       its--;
1075     }
1076     while (its--) {
1077       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1078       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1079 
1080       /* update rhs: bb1 = bb - B*x */
1081       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1082       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1083 
1084       /* local sweep */
1085       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1086       CHKERRQ(ierr);
1087     }
1088   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1089     if (flag & SOR_ZERO_INITIAL_GUESS) {
1090       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1091       its--;
1092     }
1093     while (its--) {
1094       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1095       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1096 
1097       /* update rhs: bb1 = bb - B*x */
1098       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1099       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1100 
1101       /* local sweep */
1102       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1103       CHKERRQ(ierr);
1104     }
1105   } else {
1106     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1107   }
1108 
1109   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1115 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1116 {
1117   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1118   Mat            A = mat->A,B = mat->B;
1119   PetscErrorCode ierr;
1120   PetscReal      isend[5],irecv[5];
1121 
1122   PetscFunctionBegin;
1123   info->block_size     = 1.0;
1124   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1126   isend[3] = info->memory;  isend[4] = info->mallocs;
1127   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1129   isend[3] += info->memory;  isend[4] += info->mallocs;
1130   if (flag == MAT_LOCAL) {
1131     info->nz_used      = isend[0];
1132     info->nz_allocated = isend[1];
1133     info->nz_unneeded  = isend[2];
1134     info->memory       = isend[3];
1135     info->mallocs      = isend[4];
1136   } else if (flag == MAT_GLOBAL_MAX) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   } else if (flag == MAT_GLOBAL_SUM) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   }
1151   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1152   info->fill_ratio_needed = 0;
1153   info->factor_mallocs    = 0;
1154   info->rows_global       = (double)matin->M;
1155   info->columns_global    = (double)matin->N;
1156   info->rows_local        = (double)matin->m;
1157   info->columns_local     = (double)matin->N;
1158 
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatSetOption_MPIAIJ"
1164 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1165 {
1166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   switch (op) {
1171   case MAT_NO_NEW_NONZERO_LOCATIONS:
1172   case MAT_YES_NEW_NONZERO_LOCATIONS:
1173   case MAT_COLUMNS_UNSORTED:
1174   case MAT_COLUMNS_SORTED:
1175   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1176   case MAT_KEEP_ZEROED_ROWS:
1177   case MAT_NEW_NONZERO_LOCATION_ERR:
1178   case MAT_USE_INODES:
1179   case MAT_DO_NOT_USE_INODES:
1180   case MAT_IGNORE_ZERO_ENTRIES:
1181     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1182     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1183     break;
1184   case MAT_ROW_ORIENTED:
1185     a->roworiented = PETSC_TRUE;
1186     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1188     break;
1189   case MAT_ROWS_SORTED:
1190   case MAT_ROWS_UNSORTED:
1191   case MAT_YES_NEW_DIAGONALS:
1192     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1193     break;
1194   case MAT_COLUMN_ORIENTED:
1195     a->roworiented = PETSC_FALSE;
1196     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1197     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_OFF_PROC_ENTRIES:
1200     a->donotstash = PETSC_TRUE;
1201     break;
1202   case MAT_NO_NEW_DIAGONALS:
1203     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1204   case MAT_SYMMETRIC:
1205   case MAT_STRUCTURALLY_SYMMETRIC:
1206   case MAT_HERMITIAN:
1207   case MAT_SYMMETRY_ETERNAL:
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     break;
1210   case MAT_NOT_SYMMETRIC:
1211   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1212   case MAT_NOT_HERMITIAN:
1213   case MAT_NOT_SYMMETRY_ETERNAL:
1214     break;
1215   default:
1216     SETERRQ(PETSC_ERR_SUP,"unknown option");
1217   }
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 #undef __FUNCT__
1222 #define __FUNCT__ "MatGetRow_MPIAIJ"
1223 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1224 {
1225   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1226   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1227   PetscErrorCode ierr;
1228   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1229   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1230   PetscInt       *cmap,*idx_p;
1231 
1232   PetscFunctionBegin;
1233   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1234   mat->getrowactive = PETSC_TRUE;
1235 
1236   if (!mat->rowvalues && (idx || v)) {
1237     /*
1238         allocate enough space to hold information from the longest row.
1239     */
1240     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1241     PetscInt     max = 1,tmp;
1242     for (i=0; i<matin->m; i++) {
1243       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1244       if (max < tmp) { max = tmp; }
1245     }
1246     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1247     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1248   }
1249 
1250   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1251   lrow = row - rstart;
1252 
1253   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1254   if (!v)   {pvA = 0; pvB = 0;}
1255   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1256   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1257   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1258   nztot = nzA + nzB;
1259 
1260   cmap  = mat->garray;
1261   if (v  || idx) {
1262     if (nztot) {
1263       /* Sort by increasing column numbers, assuming A and B already sorted */
1264       PetscInt imark = -1;
1265       if (v) {
1266         *v = v_p = mat->rowvalues;
1267         for (i=0; i<nzB; i++) {
1268           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1269           else break;
1270         }
1271         imark = i;
1272         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1273         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1274       }
1275       if (idx) {
1276         *idx = idx_p = mat->rowindices;
1277         if (imark > -1) {
1278           for (i=0; i<imark; i++) {
1279             idx_p[i] = cmap[cworkB[i]];
1280           }
1281         } else {
1282           for (i=0; i<nzB; i++) {
1283             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1284             else break;
1285           }
1286           imark = i;
1287         }
1288         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1289         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1290       }
1291     } else {
1292       if (idx) *idx = 0;
1293       if (v)   *v   = 0;
1294     }
1295   }
1296   *nz = nztot;
1297   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1298   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1304 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1305 {
1306   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1307 
1308   PetscFunctionBegin;
1309   if (!aij->getrowactive) {
1310     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1311   }
1312   aij->getrowactive = PETSC_FALSE;
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatNorm_MPIAIJ"
1318 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1319 {
1320   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1321   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1322   PetscErrorCode ierr;
1323   PetscInt       i,j,cstart = aij->cstart;
1324   PetscReal      sum = 0.0;
1325   PetscScalar    *v;
1326 
1327   PetscFunctionBegin;
1328   if (aij->size == 1) {
1329     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1330   } else {
1331     if (type == NORM_FROBENIUS) {
1332       v = amat->a;
1333       for (i=0; i<amat->nz; i++) {
1334 #if defined(PETSC_USE_COMPLEX)
1335         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1336 #else
1337         sum += (*v)*(*v); v++;
1338 #endif
1339       }
1340       v = bmat->a;
1341       for (i=0; i<bmat->nz; i++) {
1342 #if defined(PETSC_USE_COMPLEX)
1343         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1344 #else
1345         sum += (*v)*(*v); v++;
1346 #endif
1347       }
1348       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1349       *norm = sqrt(*norm);
1350     } else if (type == NORM_1) { /* max column norm */
1351       PetscReal *tmp,*tmp2;
1352       PetscInt    *jj,*garray = aij->garray;
1353       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1354       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1355       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1356       *norm = 0.0;
1357       v = amat->a; jj = amat->j;
1358       for (j=0; j<amat->nz; j++) {
1359         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1360       }
1361       v = bmat->a; jj = bmat->j;
1362       for (j=0; j<bmat->nz; j++) {
1363         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1364       }
1365       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1366       for (j=0; j<mat->N; j++) {
1367         if (tmp2[j] > *norm) *norm = tmp2[j];
1368       }
1369       ierr = PetscFree(tmp);CHKERRQ(ierr);
1370       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1371     } else if (type == NORM_INFINITY) { /* max row norm */
1372       PetscReal ntemp = 0.0;
1373       for (j=0; j<aij->A->m; j++) {
1374         v = amat->a + amat->i[j];
1375         sum = 0.0;
1376         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1377           sum += PetscAbsScalar(*v); v++;
1378         }
1379         v = bmat->a + bmat->i[j];
1380         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1381           sum += PetscAbsScalar(*v); v++;
1382         }
1383         if (sum > ntemp) ntemp = sum;
1384       }
1385       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1386     } else {
1387       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1388     }
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatTranspose_MPIAIJ"
1395 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1396 {
1397   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1398   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1399   PetscErrorCode ierr;
1400   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1401   Mat            B;
1402   PetscScalar    *array;
1403 
1404   PetscFunctionBegin;
1405   if (!matout && M != N) {
1406     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1407   }
1408 
1409   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1410   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1411   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1412 
1413   /* copy over the A part */
1414   Aloc = (Mat_SeqAIJ*)a->A->data;
1415   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1416   row = a->rstart;
1417   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1418   for (i=0; i<m; i++) {
1419     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1420     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1421   }
1422   aj = Aloc->j;
1423   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1424 
1425   /* copy over the B part */
1426   Aloc = (Mat_SeqAIJ*)a->B->data;
1427   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1428   row  = a->rstart;
1429   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1430   ct   = cols;
1431   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1432   for (i=0; i<m; i++) {
1433     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1434     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1435   }
1436   ierr = PetscFree(ct);CHKERRQ(ierr);
1437   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1438   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1439   if (matout) {
1440     *matout = B;
1441   } else {
1442     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1443   }
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1449 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1450 {
1451   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1452   Mat            a = aij->A,b = aij->B;
1453   PetscErrorCode ierr;
1454   PetscInt       s1,s2,s3;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1458   if (rr) {
1459     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1460     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1461     /* Overlap communication with computation. */
1462     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1463   }
1464   if (ll) {
1465     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1466     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1467     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1468   }
1469   /* scale  the diagonal block */
1470   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1471 
1472   if (rr) {
1473     /* Do a scatter end and then right scale the off-diagonal block */
1474     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1475     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1476   }
1477 
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1484 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1485 {
1486   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   if (!a->rank) {
1491     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1498 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1499 {
1500   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1501   PetscErrorCode ierr;
1502 
1503   PetscFunctionBegin;
1504   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1505   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1510 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1511 {
1512   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatEqual_MPIAIJ"
1522 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1523 {
1524   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1525   Mat            a,b,c,d;
1526   PetscTruth     flg;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   a = matA->A; b = matA->B;
1531   c = matB->A; d = matB->B;
1532 
1533   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1534   if (flg) {
1535     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1536   }
1537   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1538   PetscFunctionReturn(0);
1539 }
1540 
1541 #undef __FUNCT__
1542 #define __FUNCT__ "MatCopy_MPIAIJ"
1543 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1544 {
1545   PetscErrorCode ierr;
1546   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1547   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1548 
1549   PetscFunctionBegin;
1550   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1551   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1552     /* because of the column compression in the off-processor part of the matrix a->B,
1553        the number of columns in a->B and b->B may be different, hence we cannot call
1554        the MatCopy() directly on the two parts. If need be, we can provide a more
1555        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1556        then copying the submatrices */
1557     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1558   } else {
1559     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1560     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1567 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1568 {
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #include "petscblaslapack.h"
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatAXPY_MPIAIJ"
1579 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1580 {
1581   PetscErrorCode ierr;
1582   PetscInt       i;
1583   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1584   PetscBLASInt   bnz,one=1;
1585   Mat_SeqAIJ     *x,*y;
1586 
1587   PetscFunctionBegin;
1588   if (str == SAME_NONZERO_PATTERN) {
1589     x = (Mat_SeqAIJ *)xx->A->data;
1590     y = (Mat_SeqAIJ *)yy->A->data;
1591     bnz = (PetscBLASInt)x->nz;
1592     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1593     x = (Mat_SeqAIJ *)xx->B->data;
1594     y = (Mat_SeqAIJ *)yy->B->data;
1595     bnz = (PetscBLASInt)x->nz;
1596     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1597   } else if (str == SUBSET_NONZERO_PATTERN) {
1598     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1599 
1600     x = (Mat_SeqAIJ *)xx->B->data;
1601     y = (Mat_SeqAIJ *)yy->B->data;
1602     if (y->xtoy && y->XtoY != xx->B) {
1603       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1604       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1605     }
1606     if (!y->xtoy) { /* get xtoy */
1607       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1608       y->XtoY = xx->B;
1609     }
1610     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1611   } else {
1612     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1613   }
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 /* -------------------------------------------------------------------*/
1618 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1619        MatGetRow_MPIAIJ,
1620        MatRestoreRow_MPIAIJ,
1621        MatMult_MPIAIJ,
1622 /* 4*/ MatMultAdd_MPIAIJ,
1623        MatMultTranspose_MPIAIJ,
1624        MatMultTransposeAdd_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628 /*10*/ 0,
1629        0,
1630        0,
1631        MatRelax_MPIAIJ,
1632        MatTranspose_MPIAIJ,
1633 /*15*/ MatGetInfo_MPIAIJ,
1634        MatEqual_MPIAIJ,
1635        MatGetDiagonal_MPIAIJ,
1636        MatDiagonalScale_MPIAIJ,
1637        MatNorm_MPIAIJ,
1638 /*20*/ MatAssemblyBegin_MPIAIJ,
1639        MatAssemblyEnd_MPIAIJ,
1640        0,
1641        MatSetOption_MPIAIJ,
1642        MatZeroEntries_MPIAIJ,
1643 /*25*/ MatZeroRows_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*30*/ MatSetUpPreallocation_MPIAIJ,
1649        0,
1650        0,
1651        0,
1652        0,
1653 /*35*/ MatDuplicate_MPIAIJ,
1654        0,
1655        0,
1656        0,
1657        0,
1658 /*40*/ MatAXPY_MPIAIJ,
1659        MatGetSubMatrices_MPIAIJ,
1660        MatIncreaseOverlap_MPIAIJ,
1661        MatGetValues_MPIAIJ,
1662        MatCopy_MPIAIJ,
1663 /*45*/ MatPrintHelp_MPIAIJ,
1664        MatScale_MPIAIJ,
1665        0,
1666        0,
1667        0,
1668 /*50*/ MatSetBlockSize_MPIAIJ,
1669        0,
1670        0,
1671        0,
1672        0,
1673 /*55*/ MatFDColoringCreate_MPIAIJ,
1674        0,
1675        MatSetUnfactored_MPIAIJ,
1676        0,
1677        0,
1678 /*60*/ MatGetSubMatrix_MPIAIJ,
1679        MatDestroy_MPIAIJ,
1680        MatView_MPIAIJ,
1681        MatGetPetscMaps_Petsc,
1682        0,
1683 /*65*/ 0,
1684        0,
1685        0,
1686        0,
1687        0,
1688 /*70*/ 0,
1689        0,
1690        MatSetColoring_MPIAIJ,
1691 #if defined(PETSC_HAVE_ADIC)
1692        MatSetValuesAdic_MPIAIJ,
1693 #else
1694        0,
1695 #endif
1696        MatSetValuesAdifor_MPIAIJ,
1697 /*75*/ 0,
1698        0,
1699        0,
1700        0,
1701        0,
1702 /*80*/ 0,
1703        0,
1704        0,
1705        0,
1706 /*84*/ MatLoad_MPIAIJ,
1707        0,
1708        0,
1709        0,
1710        0,
1711        0,
1712 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1713        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1714        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1715        MatPtAP_MPIAIJ_MPIAIJ,
1716        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1717 /*95*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
1718        0,
1719        0,
1720        0};
1721 
1722 /* ----------------------------------------------------------------------------------------*/
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1727 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat)
1728 {
1729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1734   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 EXTERN_C_BEGIN
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1742 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat)
1743 {
1744   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1745   PetscErrorCode ierr;
1746 
1747   PetscFunctionBegin;
1748   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1749   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1750   PetscFunctionReturn(0);
1751 }
1752 EXTERN_C_END
1753 
1754 #include "petscpc.h"
1755 EXTERN_C_BEGIN
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1758 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1759 {
1760   Mat_MPIAIJ     *b;
1761   PetscErrorCode ierr;
1762   PetscInt       i;
1763 
1764   PetscFunctionBegin;
1765   B->preallocated = PETSC_TRUE;
1766   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1767   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1768   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1769   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1770   if (d_nnz) {
1771     for (i=0; i<B->m; i++) {
1772       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1773     }
1774   }
1775   if (o_nnz) {
1776     for (i=0; i<B->m; i++) {
1777       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1778     }
1779   }
1780   b = (Mat_MPIAIJ*)B->data;
1781   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1782   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1783 
1784   PetscFunctionReturn(0);
1785 }
1786 EXTERN_C_END
1787 
1788 #undef __FUNCT__
1789 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1790 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1791 {
1792   Mat            mat;
1793   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1794   PetscErrorCode ierr;
1795 
1796   PetscFunctionBegin;
1797   *newmat       = 0;
1798   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1799   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1800   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1801   a    = (Mat_MPIAIJ*)mat->data;
1802 
1803   mat->factor       = matin->factor;
1804   mat->bs           = matin->bs;
1805   mat->assembled    = PETSC_TRUE;
1806   mat->insertmode   = NOT_SET_VALUES;
1807   mat->preallocated = PETSC_TRUE;
1808 
1809   a->rstart       = oldmat->rstart;
1810   a->rend         = oldmat->rend;
1811   a->cstart       = oldmat->cstart;
1812   a->cend         = oldmat->cend;
1813   a->size         = oldmat->size;
1814   a->rank         = oldmat->rank;
1815   a->donotstash   = oldmat->donotstash;
1816   a->roworiented  = oldmat->roworiented;
1817   a->rowindices   = 0;
1818   a->rowvalues    = 0;
1819   a->getrowactive = PETSC_FALSE;
1820 
1821   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1822   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1823   if (oldmat->colmap) {
1824 #if defined (PETSC_USE_CTABLE)
1825     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1826 #else
1827     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1828     PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));
1829     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1830 #endif
1831   } else a->colmap = 0;
1832   if (oldmat->garray) {
1833     PetscInt len;
1834     len  = oldmat->B->n;
1835     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1836     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1837     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1838   } else a->garray = 0;
1839 
1840   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1841   PetscLogObjectParent(mat,a->lvec);
1842   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1843   PetscLogObjectParent(mat,a->Mvctx);
1844   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1845   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1846   PetscLogObjectParent(mat,a->A);
1847   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1848   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1849   PetscLogObjectParent(mat,a->B);
1850   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1851   *newmat = mat;
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #include "petscsys.h"
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatLoad_MPIAIJ"
1859 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1860 {
1861   Mat            A;
1862   PetscScalar    *vals,*svals;
1863   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1864   MPI_Status     status;
1865   PetscErrorCode ierr;
1866   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1867   PetscInt       i,nz,j,rstart,rend;
1868   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1869   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1870   PetscInt       cend,cstart,n,*rowners;
1871   int            fd;
1872 
1873   PetscFunctionBegin;
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1876   if (!rank) {
1877     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1878     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1879     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1880   }
1881 
1882   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1883   M = header[1]; N = header[2];
1884   /* determine ownership of all rows */
1885   m    = M/size + ((M % size) > rank);
1886   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1887   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1888   rowners[0] = 0;
1889   for (i=2; i<=size; i++) {
1890     rowners[i] += rowners[i-1];
1891   }
1892   rstart = rowners[rank];
1893   rend   = rowners[rank+1];
1894 
1895   /* distribute row lengths to all processors */
1896   ierr    = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
1897   if (!rank) {
1898     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1899     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1900     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1901     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1902     for (j=0; j<m; j++) {
1903       procsnz[0] += ourlens[j];
1904     }
1905     for (i=1; i<size; i++) {
1906       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1907       /* calculate the number of nonzeros on each processor */
1908       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1909         procsnz[i] += rowlengths[j];
1910       }
1911       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1912     }
1913     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1914   } else {
1915     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1916   }
1917 
1918   if (!rank) {
1919     /* determine max buffer needed and allocate it */
1920     maxnz = 0;
1921     for (i=0; i<size; i++) {
1922       maxnz = PetscMax(maxnz,procsnz[i]);
1923     }
1924     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1925 
1926     /* read in my part of the matrix column indices  */
1927     nz   = procsnz[0];
1928     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1929     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1930 
1931     /* read in every one elses and ship off */
1932     for (i=1; i<size; i++) {
1933       nz   = procsnz[i];
1934       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1935       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1936     }
1937     ierr = PetscFree(cols);CHKERRQ(ierr);
1938   } else {
1939     /* determine buffer space needed for message */
1940     nz = 0;
1941     for (i=0; i<m; i++) {
1942       nz += ourlens[i];
1943     }
1944     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1945 
1946     /* receive message of column indices*/
1947     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1948     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1949     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1950   }
1951 
1952   /* determine column ownership if matrix is not square */
1953   if (N != M) {
1954     n      = N/size + ((N % size) > rank);
1955     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1956     cstart = cend - n;
1957   } else {
1958     cstart = rstart;
1959     cend   = rend;
1960     n      = cend - cstart;
1961   }
1962 
1963   /* loop over local rows, determining number of off diagonal entries */
1964   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1965   jj = 0;
1966   for (i=0; i<m; i++) {
1967     for (j=0; j<ourlens[i]; j++) {
1968       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1969       jj++;
1970     }
1971   }
1972 
1973   /* create our matrix */
1974   for (i=0; i<m; i++) {
1975     ourlens[i] -= offlens[i];
1976   }
1977   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1978   ierr = MatSetType(A,type);CHKERRQ(ierr);
1979   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1980 
1981   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1982   for (i=0; i<m; i++) {
1983     ourlens[i] += offlens[i];
1984   }
1985 
1986   if (!rank) {
1987     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1988 
1989     /* read in my part of the matrix numerical values  */
1990     nz   = procsnz[0];
1991     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1992 
1993     /* insert into matrix */
1994     jj      = rstart;
1995     smycols = mycols;
1996     svals   = vals;
1997     for (i=0; i<m; i++) {
1998       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1999       smycols += ourlens[i];
2000       svals   += ourlens[i];
2001       jj++;
2002     }
2003 
2004     /* read in other processors and ship out */
2005     for (i=1; i<size; i++) {
2006       nz   = procsnz[i];
2007       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2008       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2009     }
2010     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2011   } else {
2012     /* receive numeric values */
2013     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2014 
2015     /* receive message of values*/
2016     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2017     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2018     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2019 
2020     /* insert into matrix */
2021     jj      = rstart;
2022     smycols = mycols;
2023     svals   = vals;
2024     for (i=0; i<m; i++) {
2025       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2026       smycols += ourlens[i];
2027       svals   += ourlens[i];
2028       jj++;
2029     }
2030   }
2031   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2032   ierr = PetscFree(vals);CHKERRQ(ierr);
2033   ierr = PetscFree(mycols);CHKERRQ(ierr);
2034   ierr = PetscFree(rowners);CHKERRQ(ierr);
2035 
2036   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2038   *newmat = A;
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2044 /*
2045     Not great since it makes two copies of the submatrix, first an SeqAIJ
2046   in local and then by concatenating the local matrices the end result.
2047   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2048 */
2049 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2050 {
2051   PetscErrorCode ierr;
2052   PetscMPIInt    rank,size;
2053   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2054   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2055   Mat            *local,M,Mreuse;
2056   PetscScalar    *vwork,*aa;
2057   MPI_Comm       comm = mat->comm;
2058   Mat_SeqAIJ     *aij;
2059 
2060 
2061   PetscFunctionBegin;
2062   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2063   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2064 
2065   if (call ==  MAT_REUSE_MATRIX) {
2066     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2067     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2068     local = &Mreuse;
2069     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2070   } else {
2071     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2072     Mreuse = *local;
2073     ierr   = PetscFree(local);CHKERRQ(ierr);
2074   }
2075 
2076   /*
2077       m - number of local rows
2078       n - number of columns (same on all processors)
2079       rstart - first row in new global matrix generated
2080   */
2081   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2082   if (call == MAT_INITIAL_MATRIX) {
2083     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2084     ii  = aij->i;
2085     jj  = aij->j;
2086 
2087     /*
2088         Determine the number of non-zeros in the diagonal and off-diagonal
2089         portions of the matrix in order to do correct preallocation
2090     */
2091 
2092     /* first get start and end of "diagonal" columns */
2093     if (csize == PETSC_DECIDE) {
2094       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2095       if (mglobal == n) { /* square matrix */
2096 	nlocal = m;
2097       } else {
2098         nlocal = n/size + ((n % size) > rank);
2099       }
2100     } else {
2101       nlocal = csize;
2102     }
2103     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2104     rstart = rend - nlocal;
2105     if (rank == size - 1 && rend != n) {
2106       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2107     }
2108 
2109     /* next, compute all the lengths */
2110     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2111     olens = dlens + m;
2112     for (i=0; i<m; i++) {
2113       jend = ii[i+1] - ii[i];
2114       olen = 0;
2115       dlen = 0;
2116       for (j=0; j<jend; j++) {
2117         if (*jj < rstart || *jj >= rend) olen++;
2118         else dlen++;
2119         jj++;
2120       }
2121       olens[i] = olen;
2122       dlens[i] = dlen;
2123     }
2124     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2125     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2126     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2127     ierr = PetscFree(dlens);CHKERRQ(ierr);
2128   } else {
2129     PetscInt ml,nl;
2130 
2131     M = *newmat;
2132     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2133     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2134     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2135     /*
2136          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2137        rather than the slower MatSetValues().
2138     */
2139     M->was_assembled = PETSC_TRUE;
2140     M->assembled     = PETSC_FALSE;
2141   }
2142   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2143   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2144   ii  = aij->i;
2145   jj  = aij->j;
2146   aa  = aij->a;
2147   for (i=0; i<m; i++) {
2148     row   = rstart + i;
2149     nz    = ii[i+1] - ii[i];
2150     cwork = jj;     jj += nz;
2151     vwork = aa;     aa += nz;
2152     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2153   }
2154 
2155   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   *newmat = M;
2158 
2159   /* save submatrix used in processor for next request */
2160   if (call ==  MAT_INITIAL_MATRIX) {
2161     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2162     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2163   }
2164 
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 EXTERN_C_BEGIN
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2171 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2172 {
2173   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2174   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2175   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2176   const PetscInt *JJ;
2177   PetscScalar    *values;
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181 #if defined(PETSC_OPT_g)
2182   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2183 #endif
2184   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2185   o_nnz = d_nnz + m;
2186 
2187   for (i=0; i<m; i++) {
2188     nnz     = I[i+1]- I[i];
2189     JJ      = J + I[i];
2190     nnz_max = PetscMax(nnz_max,nnz);
2191 #if defined(PETSC_OPT_g)
2192     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2193 #endif
2194     for (j=0; j<nnz; j++) {
2195       if (*JJ >= cstart) break;
2196       JJ++;
2197     }
2198     d = 0;
2199     for (; j<nnz; j++) {
2200       if (*JJ++ >= cend) break;
2201       d++;
2202     }
2203     d_nnz[i] = d;
2204     o_nnz[i] = nnz - d;
2205   }
2206   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2207   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2208 
2209   if (v) values = (PetscScalar*)v;
2210   else {
2211     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2212     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2213   }
2214 
2215   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2216   for (i=0; i<m; i++) {
2217     ii   = i + rstart;
2218     nnz  = I[i+1]- I[i];
2219     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2220   }
2221   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2222   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2223   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2224 
2225   if (!v) {
2226     ierr = PetscFree(values);CHKERRQ(ierr);
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 EXTERN_C_END
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2234 /*@C
2235    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2236    (the default parallel PETSc format).
2237 
2238    Collective on MPI_Comm
2239 
2240    Input Parameters:
2241 +  A - the matrix
2242 .  i - the indices into j for the start of each local row (starts with zero)
2243 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2244 -  v - optional values in the matrix
2245 
2246    Level: developer
2247 
2248 .keywords: matrix, aij, compressed row, sparse, parallel
2249 
2250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2251 @*/
2252 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2253 {
2254   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2255 
2256   PetscFunctionBegin;
2257   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2258   if (f) {
2259     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2266 /*@C
2267    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2268    (the default parallel PETSc format).  For good matrix assembly performance
2269    the user should preallocate the matrix storage by setting the parameters
2270    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2271    performance can be increased by more than a factor of 50.
2272 
2273    Collective on MPI_Comm
2274 
2275    Input Parameters:
2276 +  A - the matrix
2277 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2278            (same value is used for all local rows)
2279 .  d_nnz - array containing the number of nonzeros in the various rows of the
2280            DIAGONAL portion of the local submatrix (possibly different for each row)
2281            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2282            The size of this array is equal to the number of local rows, i.e 'm'.
2283            You must leave room for the diagonal entry even if it is zero.
2284 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2285            submatrix (same value is used for all local rows).
2286 -  o_nnz - array containing the number of nonzeros in the various rows of the
2287            OFF-DIAGONAL portion of the local submatrix (possibly different for
2288            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2289            structure. The size of this array is equal to the number
2290            of local rows, i.e 'm'.
2291 
2292    If the *_nnz parameter is given then the *_nz parameter is ignored
2293 
2294    The AIJ format (also called the Yale sparse matrix format or
2295    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2296    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2297 
2298    The parallel matrix is partitioned such that the first m0 rows belong to
2299    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2300    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2301 
2302    The DIAGONAL portion of the local submatrix of a processor can be defined
2303    as the submatrix which is obtained by extraction the part corresponding
2304    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2305    first row that belongs to the processor, and r2 is the last row belonging
2306    to the this processor. This is a square mxm matrix. The remaining portion
2307    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2308 
2309    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2310 
2311    Example usage:
2312 
2313    Consider the following 8x8 matrix with 34 non-zero values, that is
2314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2316    as follows:
2317 
2318 .vb
2319             1  2  0  |  0  3  0  |  0  4
2320     Proc0   0  5  6  |  7  0  0  |  8  0
2321             9  0 10  | 11  0  0  | 12  0
2322     -------------------------------------
2323            13  0 14  | 15 16 17  |  0  0
2324     Proc1   0 18  0  | 19 20 21  |  0  0
2325             0  0  0  | 22 23  0  | 24  0
2326     -------------------------------------
2327     Proc2  25 26 27  |  0  0 28  | 29  0
2328            30  0  0  | 31 32 33  |  0 34
2329 .ve
2330 
2331    This can be represented as a collection of submatrices as:
2332 
2333 .vb
2334       A B C
2335       D E F
2336       G H I
2337 .ve
2338 
2339    Where the submatrices A,B,C are owned by proc0, D,E,F are
2340    owned by proc1, G,H,I are owned by proc2.
2341 
2342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2344    The 'M','N' parameters are 8,8, and have the same values on all procs.
2345 
2346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2351    matrix, ans [DF] as another SeqAIJ matrix.
2352 
2353    When d_nz, o_nz parameters are specified, d_nz storage elements are
2354    allocated for every row of the local diagonal submatrix, and o_nz
2355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2356    One way to choose d_nz and o_nz is to use the max nonzerors per local
2357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2358    In this case, the values of d_nz,o_nz are:
2359 .vb
2360      proc0 : dnz = 2, o_nz = 2
2361      proc1 : dnz = 3, o_nz = 2
2362      proc2 : dnz = 1, o_nz = 4
2363 .ve
2364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2366    for proc3. i.e we are using 12+15+10=37 storage locations to store
2367    34 values.
2368 
2369    When d_nnz, o_nnz parameters are specified, the storage is specified
2370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2371    In the above case the values for d_nnz,o_nnz are:
2372 .vb
2373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2376 .ve
2377    Here the space allocated is sum of all the above values i.e 34, and
2378    hence pre-allocation is perfect.
2379 
2380    Level: intermediate
2381 
2382 .keywords: matrix, aij, compressed row, sparse, parallel
2383 
2384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2385           MPIAIJ
2386 @*/
2387 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2388 {
2389   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2393   if (f) {
2394     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatCreateMPIAIJ"
2401 /*@C
2402    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2403    (the default parallel PETSc format).  For good matrix assembly performance
2404    the user should preallocate the matrix storage by setting the parameters
2405    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2406    performance can be increased by more than a factor of 50.
2407 
2408    Collective on MPI_Comm
2409 
2410    Input Parameters:
2411 +  comm - MPI communicator
2412 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2413            This value should be the same as the local size used in creating the
2414            y vector for the matrix-vector product y = Ax.
2415 .  n - This value should be the same as the local size used in creating the
2416        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2417        calculated if N is given) For square matrices n is almost always m.
2418 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2419 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2420 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2421            (same value is used for all local rows)
2422 .  d_nnz - array containing the number of nonzeros in the various rows of the
2423            DIAGONAL portion of the local submatrix (possibly different for each row)
2424            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2425            The size of this array is equal to the number of local rows, i.e 'm'.
2426            You must leave room for the diagonal entry even if it is zero.
2427 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2428            submatrix (same value is used for all local rows).
2429 -  o_nnz - array containing the number of nonzeros in the various rows of the
2430            OFF-DIAGONAL portion of the local submatrix (possibly different for
2431            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2432            structure. The size of this array is equal to the number
2433            of local rows, i.e 'm'.
2434 
2435    Output Parameter:
2436 .  A - the matrix
2437 
2438    Notes:
2439    If the *_nnz parameter is given then the *_nz parameter is ignored
2440 
2441    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2442    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2443    storage requirements for this matrix.
2444 
2445    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2446    processor than it must be used on all processors that share the object for
2447    that argument.
2448 
2449    The user MUST specify either the local or global matrix dimensions
2450    (possibly both).
2451 
2452    The parallel matrix is partitioned such that the first m0 rows belong to
2453    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2454    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2455 
2456    The DIAGONAL portion of the local submatrix of a processor can be defined
2457    as the submatrix which is obtained by extraction the part corresponding
2458    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2459    first row that belongs to the processor, and r2 is the last row belonging
2460    to the this processor. This is a square mxm matrix. The remaining portion
2461    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2462 
2463    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2464 
2465    When calling this routine with a single process communicator, a matrix of
2466    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2467    type of communicator, use the construction mechanism:
2468      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2469 
2470    By default, this format uses inodes (identical nodes) when possible.
2471    We search for consecutive rows with the same nonzero structure, thereby
2472    reusing matrix information to achieve increased efficiency.
2473 
2474    Options Database Keys:
2475 +  -mat_aij_no_inode  - Do not use inodes
2476 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2477 -  -mat_aij_oneindex - Internally use indexing starting at 1
2478         rather than 0.  Note that when calling MatSetValues(),
2479         the user still MUST index entries starting at 0!
2480 
2481 
2482    Example usage:
2483 
2484    Consider the following 8x8 matrix with 34 non-zero values, that is
2485    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2486    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2487    as follows:
2488 
2489 .vb
2490             1  2  0  |  0  3  0  |  0  4
2491     Proc0   0  5  6  |  7  0  0  |  8  0
2492             9  0 10  | 11  0  0  | 12  0
2493     -------------------------------------
2494            13  0 14  | 15 16 17  |  0  0
2495     Proc1   0 18  0  | 19 20 21  |  0  0
2496             0  0  0  | 22 23  0  | 24  0
2497     -------------------------------------
2498     Proc2  25 26 27  |  0  0 28  | 29  0
2499            30  0  0  | 31 32 33  |  0 34
2500 .ve
2501 
2502    This can be represented as a collection of submatrices as:
2503 
2504 .vb
2505       A B C
2506       D E F
2507       G H I
2508 .ve
2509 
2510    Where the submatrices A,B,C are owned by proc0, D,E,F are
2511    owned by proc1, G,H,I are owned by proc2.
2512 
2513    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2514    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2515    The 'M','N' parameters are 8,8, and have the same values on all procs.
2516 
2517    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2518    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2519    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2520    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2521    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2522    matrix, ans [DF] as another SeqAIJ matrix.
2523 
2524    When d_nz, o_nz parameters are specified, d_nz storage elements are
2525    allocated for every row of the local diagonal submatrix, and o_nz
2526    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2527    One way to choose d_nz and o_nz is to use the max nonzerors per local
2528    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2529    In this case, the values of d_nz,o_nz are:
2530 .vb
2531      proc0 : dnz = 2, o_nz = 2
2532      proc1 : dnz = 3, o_nz = 2
2533      proc2 : dnz = 1, o_nz = 4
2534 .ve
2535    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2536    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2537    for proc3. i.e we are using 12+15+10=37 storage locations to store
2538    34 values.
2539 
2540    When d_nnz, o_nnz parameters are specified, the storage is specified
2541    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2542    In the above case the values for d_nnz,o_nnz are:
2543 .vb
2544      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2545      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2546      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2547 .ve
2548    Here the space allocated is sum of all the above values i.e 34, and
2549    hence pre-allocation is perfect.
2550 
2551    Level: intermediate
2552 
2553 .keywords: matrix, aij, compressed row, sparse, parallel
2554 
2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2556           MPIAIJ
2557 @*/
2558 PetscErrorCode MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2559 {
2560   PetscErrorCode ierr;
2561   PetscMPIInt    size;
2562 
2563   PetscFunctionBegin;
2564   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2565   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2566   if (size > 1) {
2567     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2568     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2569   } else {
2570     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2571     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2572   }
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2578 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2579 {
2580   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2581 
2582   PetscFunctionBegin;
2583   *Ad     = a->A;
2584   *Ao     = a->B;
2585   *colmap = a->garray;
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2591 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2592 {
2593   PetscErrorCode ierr;
2594   PetscInt       i;
2595   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2596 
2597   PetscFunctionBegin;
2598   if (coloring->ctype == IS_COLORING_LOCAL) {
2599     ISColoringValue *allcolors,*colors;
2600     ISColoring      ocoloring;
2601 
2602     /* set coloring for diagonal portion */
2603     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2604 
2605     /* set coloring for off-diagonal portion */
2606     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2607     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2608     for (i=0; i<a->B->n; i++) {
2609       colors[i] = allcolors[a->garray[i]];
2610     }
2611     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2612     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2613     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2614     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2615   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2616     ISColoringValue *colors;
2617     PetscInt             *larray;
2618     ISColoring      ocoloring;
2619 
2620     /* set coloring for diagonal portion */
2621     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2622     for (i=0; i<a->A->n; i++) {
2623       larray[i] = i + a->cstart;
2624     }
2625     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2626     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2627     for (i=0; i<a->A->n; i++) {
2628       colors[i] = coloring->colors[larray[i]];
2629     }
2630     ierr = PetscFree(larray);CHKERRQ(ierr);
2631     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2632     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2633     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2634 
2635     /* set coloring for off-diagonal portion */
2636     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2637     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2638     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2639     for (i=0; i<a->B->n; i++) {
2640       colors[i] = coloring->colors[larray[i]];
2641     }
2642     ierr = PetscFree(larray);CHKERRQ(ierr);
2643     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2644     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2645     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2646   } else {
2647     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2648   }
2649 
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #if defined(PETSC_HAVE_ADIC)
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2656 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2657 {
2658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2659   PetscErrorCode ierr;
2660 
2661   PetscFunctionBegin;
2662   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2663   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 #endif
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2670 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2671 {
2672   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2673   PetscErrorCode ierr;
2674 
2675   PetscFunctionBegin;
2676   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2677   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatMerge"
2683 /*@C
2684       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2685                  matrices from each processor
2686 
2687     Collective on MPI_Comm
2688 
2689    Input Parameters:
2690 +    comm - the communicators the parallel matrix will live on
2691 .    inmat - the input sequential matrices
2692 .    n - number of local columns (or PETSC_DECIDE)
2693 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2694 
2695    Output Parameter:
2696 .    outmat - the parallel matrix generated
2697 
2698     Level: advanced
2699 
2700    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2701 
2702 @*/
2703 PetscErrorCode MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2704 {
2705   PetscErrorCode ierr;
2706   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2707   PetscInt       *indx;
2708   PetscScalar    *values;
2709   PetscMap       columnmap,rowmap;
2710 
2711   PetscFunctionBegin;
2712     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2713   /*
2714   PetscMPIInt       rank;
2715   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2716   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2717   */
2718   if (scall == MAT_INITIAL_MATRIX){
2719     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2720     if (n == PETSC_DECIDE){
2721       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2722       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2723       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2724       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2725       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2726     }
2727 
2728     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2729     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2730     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2731     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2732     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2733 
2734     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2735     for (i=0;i<m;i++) {
2736       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2737       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2738       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2739     }
2740     /* This routine will ONLY return MPIAIJ type matrix */
2741     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2742     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2743     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2744     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2745 
2746   } else if (scall == MAT_REUSE_MATRIX){
2747     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2748   } else {
2749     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2750   }
2751 
2752   for (i=0;i<m;i++) {
2753     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2754     I    = i + rstart;
2755     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2756     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2757   }
2758   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2759   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761 
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatFileSplit"
2767 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2768 {
2769   PetscErrorCode    ierr;
2770   PetscMPIInt       rank;
2771   PetscInt          m,N,i,rstart,nnz;
2772   size_t            len;
2773   const PetscInt    *indx;
2774   PetscViewer       out;
2775   char              *name;
2776   Mat               B;
2777   const PetscScalar *values;
2778 
2779   PetscFunctionBegin;
2780   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2781   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2782   /* Should this be the type of the diagonal block of A? */
2783   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2784   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2785   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2786   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2787   for (i=0;i<m;i++) {
2788     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2789     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2790     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2791   }
2792   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2793   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2794 
2795   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2796   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2797   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2798   sprintf(name,"%s.%d",outfile,rank);
2799   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2800   ierr = PetscFree(name);
2801   ierr = MatView(B,out);CHKERRQ(ierr);
2802   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2803   ierr = MatDestroy(B);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2810 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2811 {
2812   PetscErrorCode       ierr;
2813   Mat_Merge_SeqsToMPI  *merge;
2814   PetscObjectContainer container;
2815 
2816   PetscFunctionBegin;
2817   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2818   if (container) {
2819     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2820     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2821     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2822     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2823     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2824     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2825     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2826     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2827     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2828     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2829     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2830     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2831 
2832     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2833     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2834   }
2835   ierr = PetscFree(merge);CHKERRQ(ierr);
2836 
2837   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #include "src/mat/utils/freespace.h"
2842 #include "petscbt.h"
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatMerge_SeqsToMPI"
2845 /*@C
2846       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2847                  matrices from each processor
2848 
2849     Collective on MPI_Comm
2850 
2851    Input Parameters:
2852 +    comm - the communicators the parallel matrix will live on
2853 .    seqmat - the input sequential matrices
2854 .    m - number of local rows (or PETSC_DECIDE)
2855 .    n - number of local columns (or PETSC_DECIDE)
2856 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2857 
2858    Output Parameter:
2859 .    mpimat - the parallel matrix generated
2860 
2861     Level: advanced
2862 
2863    Notes:
2864      The dimensions of the sequential matrix in each processor MUST be the same.
2865      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2866      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2867 @*/
2868 static PetscEvent logkey_seqstompinum = 0;
2869 PetscErrorCode MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2870 {
2871   PetscErrorCode       ierr;
2872   MPI_Comm             comm=mpimat->comm;
2873   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2874   PetscMPIInt          size,rank,taga,*len_s;
2875   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2876   PetscInt             proc,m;
2877   PetscInt             **buf_ri,**buf_rj;
2878   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2879   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2880   MPI_Request          *s_waits,*r_waits;
2881   MPI_Status           *status;
2882   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2883   Mat_Merge_SeqsToMPI  *merge;
2884   PetscObjectContainer container;
2885 
2886   PetscFunctionBegin;
2887   if (!logkey_seqstompinum) {
2888     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2889   }
2890   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2891 
2892   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2893   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2894 
2895   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2896   if (container) {
2897     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2898   }
2899   bi     = merge->bi;
2900   bj     = merge->bj;
2901   buf_ri = merge->buf_ri;
2902   buf_rj = merge->buf_rj;
2903 
2904   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2905   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2906   len_s  = merge->len_s;
2907 
2908   /* send and recv matrix values */
2909   /*-----------------------------*/
2910   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2911   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2912 
2913   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2914   for (proc=0,k=0; proc<size; proc++){
2915     if (!len_s[proc]) continue;
2916     i = owners[proc];
2917     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2918     k++;
2919   }
2920 
2921   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2922   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2923   ierr = PetscFree(status);CHKERRQ(ierr);
2924 
2925   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2926   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2927 
2928   /* insert mat values of mpimat */
2929   /*----------------------------*/
2930   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2931   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2932   nextrow = buf_ri_k + merge->nrecv;
2933   nextai  = nextrow + merge->nrecv;
2934 
2935   for (k=0; k<merge->nrecv; k++){
2936     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2937     nrows = *(buf_ri_k[k]);
2938     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2939     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2940   }
2941 
2942   /* set values of ba */
2943   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2944   for (i=0; i<m; i++) {
2945     arow = owners[rank] + i;
2946     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2947     bnzi = bi[i+1] - bi[i];
2948     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2949 
2950     /* add local non-zero vals of this proc's seqmat into ba */
2951     anzi = ai[arow+1] - ai[arow];
2952     aj   = a->j + ai[arow];
2953     aa   = a->a + ai[arow];
2954     nextaj = 0;
2955     for (j=0; nextaj<anzi; j++){
2956       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2957         ba_i[j] += aa[nextaj++];
2958       }
2959     }
2960 
2961     /* add received vals into ba */
2962     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2963       /* i-th row */
2964       if (i == *nextrow[k]) {
2965         anzi = *(nextai[k]+1) - *nextai[k];
2966         aj   = buf_rj[k] + *(nextai[k]);
2967         aa   = abuf_r[k] + *(nextai[k]);
2968         nextaj = 0;
2969         for (j=0; nextaj<anzi; j++){
2970           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2971             ba_i[j] += aa[nextaj++];
2972           }
2973         }
2974         nextrow[k]++; nextai[k]++;
2975       }
2976     }
2977     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2978   }
2979   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2980   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2981 
2982   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2983   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2984   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2985   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 static PetscEvent logkey_seqstompisym = 0;
2989 PetscErrorCode MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2990 {
2991   PetscErrorCode       ierr;
2992   Mat                  B_mpi;
2993   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2994   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2995   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2996   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2997   PetscInt             len,proc,*dnz,*onz;
2998   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2999   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3000   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3001   MPI_Status           *status;
3002   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3003   PetscBT              lnkbt;
3004   Mat_Merge_SeqsToMPI  *merge;
3005   PetscObjectContainer container;
3006 
3007   PetscFunctionBegin;
3008   if (!logkey_seqstompisym) {
3009     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3010   }
3011   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3012 
3013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3014   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3015 
3016   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3017   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3018 
3019   /* determine row ownership */
3020   /*---------------------------------------------------------*/
3021   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3022   if (m == PETSC_DECIDE) {
3023     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3024   } else {
3025     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3026   }
3027   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3028   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3029   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3030 
3031   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3032   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3033 
3034   /* determine the number of messages to send, their lengths */
3035   /*---------------------------------------------------------*/
3036   len_s  = merge->len_s;
3037 
3038   len = 0;  /* length of buf_si[] */
3039   merge->nsend = 0;
3040   for (proc=0; proc<size; proc++){
3041     len_si[proc] = 0;
3042     if (proc == rank){
3043       len_s[proc] = 0;
3044     } else {
3045       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3046       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3047     }
3048     if (len_s[proc]) {
3049       merge->nsend++;
3050       nrows = 0;
3051       for (i=owners[proc]; i<owners[proc+1]; i++){
3052         if (ai[i+1] > ai[i]) nrows++;
3053       }
3054       len_si[proc] = 2*(nrows+1);
3055       len += len_si[proc];
3056     }
3057   }
3058 
3059   /* determine the number and length of messages to receive for ij-structure */
3060   /*-------------------------------------------------------------------------*/
3061   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3062   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3063 
3064   /* post the Irecv of j-structure */
3065   /*-------------------------------*/
3066   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3067   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3068 
3069   /* post the Isend of j-structure */
3070   /*--------------------------------*/
3071   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3072   sj_waits = si_waits + merge->nsend;
3073 
3074   for (proc=0, k=0; proc<size; proc++){
3075     if (!len_s[proc]) continue;
3076     i = owners[proc];
3077     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3078     k++;
3079   }
3080 
3081   /* receives and sends of j-structure are complete */
3082   /*------------------------------------------------*/
3083   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3084   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3085 
3086   /* send and recv i-structure */
3087   /*---------------------------*/
3088   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3089   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3090 
3091   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3092   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3093   for (proc=0,k=0; proc<size; proc++){
3094     if (!len_s[proc]) continue;
3095     /* form outgoing message for i-structure:
3096          buf_si[0]:                 nrows to be sent
3097                [1:nrows]:           row index (global)
3098                [nrows+1:2*nrows+1]: i-structure index
3099     */
3100     /*-------------------------------------------*/
3101     nrows = len_si[proc]/2 - 1;
3102     buf_si_i    = buf_si + nrows+1;
3103     buf_si[0]   = nrows;
3104     buf_si_i[0] = 0;
3105     nrows = 0;
3106     for (i=owners[proc]; i<owners[proc+1]; i++){
3107       anzi = ai[i+1] - ai[i];
3108       if (anzi) {
3109         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3110         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3111         nrows++;
3112       }
3113     }
3114     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3115     k++;
3116     buf_si += len_si[proc];
3117   }
3118 
3119   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3120   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3121 
3122   ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3123   for (i=0; i<merge->nrecv; i++){
3124     ierr = PetscLogInfo((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3125   }
3126 
3127   ierr = PetscFree(len_si);CHKERRQ(ierr);
3128   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3129   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3130   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3131   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3132   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3133   ierr = PetscFree(status);CHKERRQ(ierr);
3134 
3135   /* compute a local seq matrix in each processor */
3136   /*----------------------------------------------*/
3137   /* allocate bi array and free space for accumulating nonzero column info */
3138   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3139   bi[0] = 0;
3140 
3141   /* create and initialize a linked list */
3142   nlnk = N+1;
3143   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3144 
3145   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3146   len = 0;
3147   len  = ai[owners[rank+1]] - ai[owners[rank]];
3148   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3149   current_space = free_space;
3150 
3151   /* determine symbolic info for each local row */
3152   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3153   nextrow = buf_ri_k + merge->nrecv;
3154   nextai  = nextrow + merge->nrecv;
3155   for (k=0; k<merge->nrecv; k++){
3156     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3157     nrows = *buf_ri_k[k];
3158     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3159     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3160   }
3161 
3162   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3163   len = 0;
3164   for (i=0;i<m;i++) {
3165     bnzi   = 0;
3166     /* add local non-zero cols of this proc's seqmat into lnk */
3167     arow   = owners[rank] + i;
3168     anzi   = ai[arow+1] - ai[arow];
3169     aj     = a->j + ai[arow];
3170     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3171     bnzi += nlnk;
3172     /* add received col data into lnk */
3173     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3174       if (i == *nextrow[k]) { /* i-th row */
3175         anzi = *(nextai[k]+1) - *nextai[k];
3176         aj   = buf_rj[k] + *nextai[k];
3177         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3178         bnzi += nlnk;
3179         nextrow[k]++; nextai[k]++;
3180       }
3181     }
3182     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3183 
3184     /* if free space is not available, make more free space */
3185     if (current_space->local_remaining<bnzi) {
3186       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3187       nspacedouble++;
3188     }
3189     /* copy data into free space, then initialize lnk */
3190     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3191     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3192 
3193     current_space->array           += bnzi;
3194     current_space->local_used      += bnzi;
3195     current_space->local_remaining -= bnzi;
3196 
3197     bi[i+1] = bi[i] + bnzi;
3198   }
3199 
3200   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3201 
3202   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3203   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3204   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3205 
3206   /* create symbolic parallel matrix B_mpi */
3207   /*---------------------------------------*/
3208   if (n==PETSC_DECIDE) {
3209     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3210   } else {
3211     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3212   }
3213   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3214   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3215   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3216 
3217   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3218   B_mpi->assembled     = PETSC_FALSE;
3219   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3220   merge->bi            = bi;
3221   merge->bj            = bj;
3222   merge->buf_ri        = buf_ri;
3223   merge->buf_rj        = buf_rj;
3224   merge->coi           = PETSC_NULL;
3225   merge->coj           = PETSC_NULL;
3226   merge->owners_co     = PETSC_NULL;
3227 
3228   /* attach the supporting struct to B_mpi for reuse */
3229   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3230   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3231   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3232   *mpimat = B_mpi;
3233   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3234   PetscFunctionReturn(0);
3235 }
3236 
3237 static PetscEvent logkey_seqstompi = 0;
3238 PetscErrorCode MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3239 {
3240   PetscErrorCode   ierr;
3241 
3242   PetscFunctionBegin;
3243   if (!logkey_seqstompi) {
3244     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3245   }
3246   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3247   if (scall == MAT_INITIAL_MATRIX){
3248     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3249   }
3250   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3251   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3252   PetscFunctionReturn(0);
3253 }
3254 static PetscEvent logkey_getlocalmat = 0;
3255 #undef __FUNCT__
3256 #define __FUNCT__ "MatGetLocalMat"
3257 /*@C
3258      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3259 
3260     Not Collective
3261 
3262    Input Parameters:
3263 +    A - the matrix
3264 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3265 
3266    Output Parameter:
3267 .    A_loc - the local sequential matrix generated
3268 
3269     Level: developer
3270 
3271 @*/
3272 PetscErrorCode MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3273 {
3274   PetscErrorCode  ierr;
3275   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3276   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3277   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3278   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3279   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3280   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3281 
3282   PetscFunctionBegin;
3283   if (!logkey_getlocalmat) {
3284     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3285   }
3286   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3287   if (scall == MAT_INITIAL_MATRIX){
3288     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3289     ci[0] = 0;
3290     for (i=0; i<am; i++){
3291       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3292     }
3293     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3294     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3295     k = 0;
3296     for (i=0; i<am; i++) {
3297       ncols_o = bi[i+1] - bi[i];
3298       ncols_d = ai[i+1] - ai[i];
3299       /* off-diagonal portion of A */
3300       for (jo=0; jo<ncols_o; jo++) {
3301         col = cmap[*bj];
3302         if (col >= cstart) break;
3303         cj[k]   = col; bj++;
3304         ca[k++] = *ba++;
3305       }
3306       /* diagonal portion of A */
3307       for (j=0; j<ncols_d; j++) {
3308         cj[k]   = cstart + *aj++;
3309         ca[k++] = *aa++;
3310       }
3311       /* off-diagonal portion of A */
3312       for (j=jo; j<ncols_o; j++) {
3313         cj[k]   = cmap[*bj++];
3314         ca[k++] = *ba++;
3315       }
3316     }
3317     /* put together the new matrix */
3318     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3319     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3320     /* Since these are PETSc arrays, change flags to free them as necessary. */
3321     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3322     mat->freedata = PETSC_TRUE;
3323     mat->nonew    = 0;
3324   } else if (scall == MAT_REUSE_MATRIX){
3325     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3326     ci = mat->i; cj = mat->j; ca = mat->a;
3327     for (i=0; i<am; i++) {
3328       /* off-diagonal portion of A */
3329       ncols_o = bi[i+1] - bi[i];
3330       for (jo=0; jo<ncols_o; jo++) {
3331         col = cmap[*bj];
3332         if (col >= cstart) break;
3333         *ca++ = *ba++; bj++;
3334       }
3335       /* diagonal portion of A */
3336       ncols_d = ai[i+1] - ai[i];
3337       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3338       /* off-diagonal portion of A */
3339       for (j=jo; j<ncols_o; j++) {
3340         *ca++ = *ba++; bj++;
3341       }
3342     }
3343   } else {
3344     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3345   }
3346 
3347   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 static PetscEvent logkey_getlocalmatcondensed = 0;
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatGetLocalMatCondensed"
3354 /*@C
3355      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3356 
3357     Not Collective
3358 
3359    Input Parameters:
3360 +    A - the matrix
3361 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3362 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3363 
3364    Output Parameter:
3365 .    A_loc - the local sequential matrix generated
3366 
3367     Level: developer
3368 
3369 @*/
3370 PetscErrorCode MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3371 {
3372   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3373   PetscErrorCode    ierr;
3374   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3375   IS                isrowa,iscola;
3376   Mat               *aloc;
3377 
3378   PetscFunctionBegin;
3379   if (!logkey_getlocalmatcondensed) {
3380     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3381   }
3382   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3383   if (!row){
3384     start = a->rstart; end = a->rend;
3385     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3386   } else {
3387     isrowa = *row;
3388   }
3389   if (!col){
3390     start = a->cstart;
3391     cmap  = a->garray;
3392     nzA   = a->A->n;
3393     nzB   = a->B->n;
3394     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3395     ncols = 0;
3396     for (i=0; i<nzB; i++) {
3397       if (cmap[i] < start) idx[ncols++] = cmap[i];
3398       else break;
3399     }
3400     imark = i;
3401     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3402     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3403     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3404     ierr = PetscFree(idx);CHKERRQ(ierr);
3405   } else {
3406     iscola = *col;
3407   }
3408   if (scall != MAT_INITIAL_MATRIX){
3409     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3410     aloc[0] = *A_loc;
3411   }
3412   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3413   *A_loc = aloc[0];
3414   ierr = PetscFree(aloc);CHKERRQ(ierr);
3415   if (!row){
3416     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3417   }
3418   if (!col){
3419     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3420   }
3421   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3422   PetscFunctionReturn(0);
3423 }
3424 
3425 static PetscEvent logkey_GetBrowsOfAcols = 0;
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatGetBrowsOfAcols"
3428 /*@C
3429     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3430 
3431     Collective on Mat
3432 
3433    Input Parameters:
3434 +    A,B - the matrices in mpiaij format
3435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3436 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3437 
3438    Output Parameter:
3439 +    rowb, colb - index sets of rows and columns of B to extract
3440 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3441 -    B_seq - the sequential matrix generated
3442 
3443     Level: developer
3444 
3445 @*/
3446 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3447 {
3448   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3449   PetscErrorCode    ierr;
3450   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3451   IS                isrowb,iscolb;
3452   Mat               *bseq;
3453 
3454   PetscFunctionBegin;
3455   if (a->cstart != b->rstart || a->cend != b->rend){
3456     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3457   }
3458   if (!logkey_GetBrowsOfAcols) {
3459     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3460   }
3461   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3462 
3463   if (scall == MAT_INITIAL_MATRIX){
3464     start = a->cstart;
3465     cmap  = a->garray;
3466     nzA   = a->A->n;
3467     nzB   = a->B->n;
3468     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3469     ncols = 0;
3470     for (i=0; i<nzB; i++) {  /* row < local row index */
3471       if (cmap[i] < start) idx[ncols++] = cmap[i];
3472       else break;
3473     }
3474     imark = i;
3475     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3476     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3477     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3478     ierr = PetscFree(idx);CHKERRQ(ierr);
3479     *brstart = imark;
3480     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3481   } else {
3482     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3483     isrowb = *rowb; iscolb = *colb;
3484     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3485     bseq[0] = *B_seq;
3486   }
3487   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3488   *B_seq = bseq[0];
3489   ierr = PetscFree(bseq);CHKERRQ(ierr);
3490   if (!rowb){
3491     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3492   } else {
3493     *rowb = isrowb;
3494   }
3495   if (!colb){
3496     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3497   } else {
3498     *colb = iscolb;
3499   }
3500   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3501   PetscFunctionReturn(0);
3502 }
3503 
3504 static PetscEvent logkey_GetBrowsOfAocols = 0;
3505 #undef __FUNCT__
3506 #define __FUNCT__ "MatGetBrowsOfAoCols"
3507 /*@C
3508     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3509     of the OFF-DIAGONAL portion of local A
3510 
3511     Collective on Mat
3512 
3513    Input Parameters:
3514 +    A,B - the matrices in mpiaij format
3515 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3516 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3517 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3518 
3519    Output Parameter:
3520 +    B_oth - the sequential matrix generated
3521 
3522     Level: developer
3523 
3524 @*/
3525 PetscErrorCode MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3526 {
3527   VecScatter_MPI_General *gen_to,*gen_from;
3528   PetscErrorCode         ierr;
3529   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3530   Mat_SeqAIJ             *b_oth;
3531   VecScatter             ctx=a->Mvctx;
3532   MPI_Comm               comm=ctx->comm;
3533   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3534   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3535   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3536   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3537   MPI_Request            *rwaits,*swaits;
3538   MPI_Status             *sstatus,rstatus;
3539   PetscInt               *cols;
3540   PetscScalar            *vals;
3541   PetscMPIInt            j;
3542 
3543   PetscFunctionBegin;
3544   if (a->cstart != b->rstart || a->cend != b->rend){
3545     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3546   }
3547   if (!logkey_GetBrowsOfAocols) {
3548     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3549   }
3550   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3551   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3552 
3553   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3554   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3555   rvalues  = gen_from->values; /* holds the length of sending row */
3556   svalues  = gen_to->values;   /* holds the length of receiving row */
3557   nrecvs   = gen_from->n;
3558   nsends   = gen_to->n;
3559   rwaits   = gen_from->requests;
3560   swaits   = gen_to->requests;
3561   srow     = gen_to->indices;   /* local row index to be sent */
3562   rstarts  = gen_from->starts;
3563   sstarts  = gen_to->starts;
3564   rprocs   = gen_from->procs;
3565   sprocs   = gen_to->procs;
3566   sstatus  = gen_to->sstatus;
3567 
3568   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3569   if (scall == MAT_INITIAL_MATRIX){
3570     /* i-array */
3571     /*---------*/
3572     /*  post receives */
3573     for (i=0; i<nrecvs; i++){
3574       rowlen = (PetscInt*)rvalues + rstarts[i];
3575       nrows = rstarts[i+1]-rstarts[i];
3576       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3577     }
3578 
3579     /* pack the outgoing message */
3580     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3581     rstartsj = sstartsj + nsends +1;
3582     sstartsj[0] = 0;  rstartsj[0] = 0;
3583     len = 0; /* total length of j or a array to be sent */
3584     k = 0;
3585     for (i=0; i<nsends; i++){
3586       rowlen = (PetscInt*)svalues + sstarts[i];
3587       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3588       for (j=0; j<nrows; j++) {
3589         row = srow[k] + b->rowners[rank]; /* global row idx */
3590         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3591         len += rowlen[j];
3592         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3593         k++;
3594       }
3595       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3596        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3597     }
3598     /* recvs and sends of i-array are completed */
3599     i = nrecvs;
3600     while (i--) {
3601       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3602     }
3603     if (nsends) {
3604       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3605     }
3606     /* allocate buffers for sending j and a arrays */
3607     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3608     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3609 
3610     /* create i-array of B_oth */
3611     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3612     b_othi[0] = 0;
3613     len = 0; /* total length of j or a array to be received */
3614     k = 0;
3615     for (i=0; i<nrecvs; i++){
3616       rowlen = (PetscInt*)rvalues + rstarts[i];
3617       nrows = rstarts[i+1]-rstarts[i];
3618       for (j=0; j<nrows; j++) {
3619         b_othi[k+1] = b_othi[k] + rowlen[j];
3620         len += rowlen[j]; k++;
3621       }
3622       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3623     }
3624 
3625     /* allocate space for j and a arrrays of B_oth */
3626     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3627     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3628 
3629     /* j-array */
3630     /*---------*/
3631     /*  post receives of j-array */
3632     for (i=0; i<nrecvs; i++){
3633       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3634       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3635     }
3636     k = 0;
3637     for (i=0; i<nsends; i++){
3638       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3639       bufJ = bufj+sstartsj[i];
3640       for (j=0; j<nrows; j++) {
3641         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3642         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3643         for (l=0; l<ncols; l++){
3644           *bufJ++ = cols[l];
3645         }
3646         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3647       }
3648       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3649     }
3650 
3651     /* recvs and sends of j-array are completed */
3652     i = nrecvs;
3653     while (i--) {
3654       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3655     }
3656     if (nsends) {
3657       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3658     }
3659   } else if (scall == MAT_REUSE_MATRIX){
3660     sstartsj = *startsj;
3661     rstartsj = sstartsj + nsends +1;
3662     bufa     = *bufa_ptr;
3663     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3664     b_otha   = b_oth->a;
3665   } else {
3666     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3667   }
3668 
3669   /* a-array */
3670   /*---------*/
3671   /*  post receives of a-array */
3672   for (i=0; i<nrecvs; i++){
3673     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3674     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3675   }
3676   k = 0;
3677   for (i=0; i<nsends; i++){
3678     nrows = sstarts[i+1]-sstarts[i];
3679     bufA = bufa+sstartsj[i];
3680     for (j=0; j<nrows; j++) {
3681       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3682       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3683       for (l=0; l<ncols; l++){
3684         *bufA++ = vals[l];
3685       }
3686       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3687 
3688     }
3689     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3690   }
3691   /* recvs and sends of a-array are completed */
3692   i = nrecvs;
3693   while (i--) {
3694     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3695   }
3696    if (nsends) {
3697     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3698   }
3699 
3700   if (scall == MAT_INITIAL_MATRIX){
3701     /* put together the new matrix */
3702     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3703 
3704     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3705     /* Since these are PETSc arrays, change flags to free them as necessary. */
3706     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3707     b_oth->freedata = PETSC_TRUE;
3708     b_oth->nonew    = 0;
3709 
3710     ierr = PetscFree(bufj);CHKERRQ(ierr);
3711     if (!startsj || !bufa_ptr){
3712       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3713       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3714     } else {
3715       *startsj  = sstartsj;
3716       *bufa_ptr = bufa;
3717     }
3718   }
3719   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3720 
3721   PetscFunctionReturn(0);
3722 }
3723 
3724 /*MC
3725    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3726 
3727    Options Database Keys:
3728 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3729 
3730   Level: beginner
3731 
3732 .seealso: MatCreateMPIAIJ
3733 M*/
3734 
3735 EXTERN_C_BEGIN
3736 #undef __FUNCT__
3737 #define __FUNCT__ "MatCreate_MPIAIJ"
3738 PetscErrorCode MatCreate_MPIAIJ(Mat B)
3739 {
3740   Mat_MPIAIJ     *b;
3741   PetscErrorCode ierr;
3742   PetscInt       i;
3743   PetscMPIInt    size;
3744 
3745   PetscFunctionBegin;
3746   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3747 
3748   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3749   B->data         = (void*)b;
3750   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3751   B->factor       = 0;
3752   B->bs           = 1;
3753   B->assembled    = PETSC_FALSE;
3754   B->mapping      = 0;
3755 
3756   B->insertmode      = NOT_SET_VALUES;
3757   b->size            = size;
3758   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3759 
3760   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3761   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3762 
3763   /* the information in the maps duplicates the information computed below, eventually
3764      we should remove the duplicate information that is not contained in the maps */
3765   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3766   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3767 
3768   /* build local table of row and column ownerships */
3769   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3770   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
3771   b->cowners = b->rowners + b->size + 2;
3772   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3773   b->rowners[0] = 0;
3774   for (i=2; i<=b->size; i++) {
3775     b->rowners[i] += b->rowners[i-1];
3776   }
3777   b->rstart = b->rowners[b->rank];
3778   b->rend   = b->rowners[b->rank+1];
3779   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3780   b->cowners[0] = 0;
3781   for (i=2; i<=b->size; i++) {
3782     b->cowners[i] += b->cowners[i-1];
3783   }
3784   b->cstart = b->cowners[b->rank];
3785   b->cend   = b->cowners[b->rank+1];
3786 
3787   /* build cache for off array entries formed */
3788   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3789   b->donotstash  = PETSC_FALSE;
3790   b->colmap      = 0;
3791   b->garray      = 0;
3792   b->roworiented = PETSC_TRUE;
3793 
3794   /* stuff used for matrix vector multiply */
3795   b->lvec      = PETSC_NULL;
3796   b->Mvctx     = PETSC_NULL;
3797 
3798   /* stuff for MatGetRow() */
3799   b->rowindices   = 0;
3800   b->rowvalues    = 0;
3801   b->getrowactive = PETSC_FALSE;
3802 
3803   /* Explicitly create 2 MATSEQAIJ matrices. */
3804   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3805   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3806   PetscLogObjectParent(B,b->A);
3807   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3808   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3809   PetscLogObjectParent(B,b->B);
3810 
3811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3812                                      "MatStoreValues_MPIAIJ",
3813                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3815                                      "MatRetrieveValues_MPIAIJ",
3816                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3818 				     "MatGetDiagonalBlock_MPIAIJ",
3819                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3821 				     "MatIsTranspose_MPIAIJ",
3822 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3824 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3825 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3827 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3828 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3830 				     "MatDiagonalScaleLocal_MPIAIJ",
3831 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3832   PetscFunctionReturn(0);
3833 }
3834 EXTERN_C_END
3835