xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 76f2fa84772e8d0312027f1add11acf851899fc9)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRelax_MPIAIJ"
1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1018 {
1019   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscErrorCode ierr;
1021   Vec            bb1;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatPermute_MPIAIJ"
1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1091 {
1092   MPI_Comm       comm,pcomm;
1093   PetscInt       first,local_size,nrows,*rows;
1094   int            ntids;
1095   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1100   /* make a collective version of 'rowp' */
1101   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1102   if (pcomm==comm) {
1103     crowp = rowp;
1104   } else {
1105     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1106     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1107     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1108     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1109   }
1110   /* collect the global row permutation and invert it */
1111   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1112   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1113   if (pcomm!=comm) {
1114     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1115   }
1116   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1117   /* get the local target indices */
1118   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1122   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1123   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1124   /* the column permutation is so much easier;
1125      make a local version of 'colp' and invert it */
1126   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1127   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1128   if (ntids==1) {
1129     lcolp = colp;
1130   } else {
1131     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1132     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1134   }
1135   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1136   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1137   if (ntids>1) {
1138     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1139     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1140   }
1141   /* now we just get the submatrix */
1142   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1143   /* clean up */
1144   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1145   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1152 {
1153   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1154   Mat            A = mat->A,B = mat->B;
1155   PetscErrorCode ierr;
1156   PetscReal      isend[5],irecv[5];
1157 
1158   PetscFunctionBegin;
1159   info->block_size     = 1.0;
1160   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1162   isend[3] = info->memory;  isend[4] = info->mallocs;
1163   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1165   isend[3] += info->memory;  isend[4] += info->mallocs;
1166   if (flag == MAT_LOCAL) {
1167     info->nz_used      = isend[0];
1168     info->nz_allocated = isend[1];
1169     info->nz_unneeded  = isend[2];
1170     info->memory       = isend[3];
1171     info->mallocs      = isend[4];
1172   } else if (flag == MAT_GLOBAL_MAX) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else if (flag == MAT_GLOBAL_SUM) {
1180     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1181     info->nz_used      = irecv[0];
1182     info->nz_allocated = irecv[1];
1183     info->nz_unneeded  = irecv[2];
1184     info->memory       = irecv[3];
1185     info->mallocs      = irecv[4];
1186   }
1187   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1188   info->fill_ratio_needed = 0;
1189   info->factor_mallocs    = 0;
1190   info->rows_global       = (double)matin->rmap.N;
1191   info->columns_global    = (double)matin->cmap.N;
1192   info->rows_local        = (double)matin->rmap.n;
1193   info->columns_local     = (double)matin->cmap.N;
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPIAIJ"
1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   switch (op) {
1207   case MAT_NO_NEW_NONZERO_LOCATIONS:
1208   case MAT_YES_NEW_NONZERO_LOCATIONS:
1209   case MAT_COLUMNS_UNSORTED:
1210   case MAT_COLUMNS_SORTED:
1211   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1212   case MAT_KEEP_ZEROED_ROWS:
1213   case MAT_NEW_NONZERO_LOCATION_ERR:
1214   case MAT_USE_INODES:
1215   case MAT_DO_NOT_USE_INODES:
1216   case MAT_IGNORE_ZERO_ENTRIES:
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROW_ORIENTED:
1221     a->roworiented = PETSC_TRUE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_ROWS_SORTED:
1226   case MAT_ROWS_UNSORTED:
1227   case MAT_YES_NEW_DIAGONALS:
1228     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1229     break;
1230   case MAT_COLUMN_ORIENTED:
1231     a->roworiented = PETSC_FALSE;
1232     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1233     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_OFF_PROC_ENTRIES:
1236     a->donotstash = PETSC_TRUE;
1237     break;
1238   case MAT_NO_NEW_DIAGONALS:
1239     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     break;
1248   case MAT_NOT_SYMMETRIC:
1249   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1250   case MAT_NOT_HERMITIAN:
1251   case MAT_NOT_SYMMETRY_ETERNAL:
1252     break;
1253   default:
1254     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRow_MPIAIJ"
1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1264   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265   PetscErrorCode ierr;
1266   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1267   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1268   PetscInt       *cmap,*idx_p;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1279     PetscInt     max = 1,tmp;
1280     for (i=0; i<matin->rmap.n; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1285     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1286   }
1287 
1288   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1289   lrow = row - rstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       PetscInt imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for (i=0; i<nzB; i++) {
1306           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1311         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for (i=0; i<imark; i++) {
1317             idx_p[i] = cmap[cworkB[i]];
1318           }
1319         } else {
1320           for (i=0; i<nzB; i++) {
1321             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!aij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1349   }
1350   aij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatNorm_MPIAIJ"
1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1357 {
1358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1359   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,j,cstart = mat->cmap.rstart;
1362   PetscReal      sum = 0.0;
1363   PetscScalar    *v;
1364 
1365   PetscFunctionBegin;
1366   if (aij->size == 1) {
1367     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1368   } else {
1369     if (type == NORM_FROBENIUS) {
1370       v = amat->a;
1371       for (i=0; i<amat->nz; i++) {
1372 #if defined(PETSC_USE_COMPLEX)
1373         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1374 #else
1375         sum += (*v)*(*v); v++;
1376 #endif
1377       }
1378       v = bmat->a;
1379       for (i=0; i<bmat->nz; i++) {
1380 #if defined(PETSC_USE_COMPLEX)
1381         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1382 #else
1383         sum += (*v)*(*v); v++;
1384 #endif
1385       }
1386       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       *norm = sqrt(*norm);
1388     } else if (type == NORM_1) { /* max column norm */
1389       PetscReal *tmp,*tmp2;
1390       PetscInt    *jj,*garray = aij->garray;
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1393       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1394       *norm = 0.0;
1395       v = amat->a; jj = amat->j;
1396       for (j=0; j<amat->nz; j++) {
1397         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1398       }
1399       v = bmat->a; jj = bmat->j;
1400       for (j=0; j<bmat->nz; j++) {
1401         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1402       }
1403       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1404       for (j=0; j<mat->cmap.N; j++) {
1405         if (tmp2[j] > *norm) *norm = tmp2[j];
1406       }
1407       ierr = PetscFree(tmp);CHKERRQ(ierr);
1408       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1409     } else if (type == NORM_INFINITY) { /* max row norm */
1410       PetscReal ntemp = 0.0;
1411       for (j=0; j<aij->A->rmap.n; j++) {
1412         v = amat->a + amat->i[j];
1413         sum = 0.0;
1414         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1415           sum += PetscAbsScalar(*v); v++;
1416         }
1417         v = bmat->a + bmat->i[j];
1418         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1419           sum += PetscAbsScalar(*v); v++;
1420         }
1421         if (sum > ntemp) ntemp = sum;
1422       }
1423       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1424     } else {
1425       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1426     }
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatTranspose_MPIAIJ"
1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1434 {
1435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1436   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1437   PetscErrorCode ierr;
1438   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) {
1444     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445   }
1446 
1447   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1448   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1449   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1450   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1451 
1452   /* copy over the A part */
1453   Aloc = (Mat_SeqAIJ*)a->A->data;
1454   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1457   for (i=0; i<m; i++) {
1458     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1459     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1460   }
1461   aj = Aloc->j;
1462   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1463 
1464   /* copy over the B part */
1465   Aloc = (Mat_SeqAIJ*)a->B->data;
1466   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1467   row  = A->rmap.rstart;
1468   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1469   ct   = cols;
1470   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1471   for (i=0; i<m; i++) {
1472     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1473     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1474   }
1475   ierr = PetscFree(ct);CHKERRQ(ierr);
1476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   if (matout) {
1479     *matout = B;
1480   } else {
1481     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1489 {
1490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1491   Mat            a = aij->A,b = aij->B;
1492   PetscErrorCode ierr;
1493   PetscInt       s1,s2,s3;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1497   if (rr) {
1498     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1499     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1500     /* Overlap communication with computation. */
1501     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1502   }
1503   if (ll) {
1504     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1505     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1506     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1507   }
1508   /* scale  the diagonal block */
1509   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1510 
1511   if (rr) {
1512     /* Do a scatter end and then right scale the off-diagonal block */
1513     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1514     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1515   }
1516 
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1522 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1529   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1530   PetscFunctionReturn(0);
1531 }
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1534 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1535 {
1536   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatEqual_MPIAIJ"
1546 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1547 {
1548   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1549   Mat            a,b,c,d;
1550   PetscTruth     flg;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   a = matA->A; b = matA->B;
1555   c = matB->A; d = matB->B;
1556 
1557   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1558   if (flg) {
1559     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1560   }
1561   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatCopy_MPIAIJ"
1567 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1568 {
1569   PetscErrorCode ierr;
1570   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1571   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1572 
1573   PetscFunctionBegin;
1574   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1575   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1576     /* because of the column compression in the off-processor part of the matrix a->B,
1577        the number of columns in a->B and b->B may be different, hence we cannot call
1578        the MatCopy() directly on the two parts. If need be, we can provide a more
1579        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1580        then copying the submatrices */
1581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1582   } else {
1583     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1584     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1591 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1592 {
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #include "petscblaslapack.h"
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatAXPY_MPIAIJ"
1603 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1604 {
1605   PetscErrorCode ierr;
1606   PetscInt       i;
1607   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1608   PetscBLASInt   bnz,one=1;
1609   Mat_SeqAIJ     *x,*y;
1610 
1611   PetscFunctionBegin;
1612   if (str == SAME_NONZERO_PATTERN) {
1613     PetscScalar alpha = a;
1614     x = (Mat_SeqAIJ *)xx->A->data;
1615     y = (Mat_SeqAIJ *)yy->A->data;
1616     bnz = (PetscBLASInt)x->nz;
1617     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1618     x = (Mat_SeqAIJ *)xx->B->data;
1619     y = (Mat_SeqAIJ *)yy->B->data;
1620     bnz = (PetscBLASInt)x->nz;
1621     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1622   } else if (str == SUBSET_NONZERO_PATTERN) {
1623     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1624 
1625     x = (Mat_SeqAIJ *)xx->B->data;
1626     y = (Mat_SeqAIJ *)yy->B->data;
1627     if (y->xtoy && y->XtoY != xx->B) {
1628       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1629       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1630     }
1631     if (!y->xtoy) { /* get xtoy */
1632       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1633       y->XtoY = xx->B;
1634     }
1635     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1636   } else {
1637     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1638   }
1639   PetscFunctionReturn(0);
1640 }
1641 
1642 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1643 
1644 #undef __FUNCT__
1645 #define __FUNCT__ "MatConjugate_MPIAIJ"
1646 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1647 {
1648 #if defined(PETSC_USE_COMPLEX)
1649   PetscErrorCode ierr;
1650   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1651 
1652   PetscFunctionBegin;
1653   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1654   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1655 #else
1656   PetscFunctionBegin;
1657 #endif
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatRealPart_MPIAIJ"
1663 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1664 {
1665   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1670   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1676 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1677 {
1678   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1679   PetscErrorCode ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1683   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #ifdef PETSC_HAVE_PBGL
1688 
1689 #include <boost/parallel/mpi/bsp_process_group.hpp>
1690 #include <boost/graph/distributed/ilu_default_graph.hpp>
1691 #include <boost/graph/distributed/ilu_0_block.hpp>
1692 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1693 #include <boost/graph/distributed/petsc/interface.hpp>
1694 #include <boost/multi_array.hpp>
1695 #include <boost/parallel/distributed_property_map.hpp>
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1699 /*
1700   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1701 */
1702 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1703 {
1704   namespace petsc = boost::distributed::petsc;
1705 
1706   namespace graph_dist = boost::graph::distributed;
1707   using boost::graph::distributed::ilu_default::process_group_type;
1708   using boost::graph::ilu_permuted;
1709 
1710   PetscTruth      row_identity, col_identity;
1711   PetscContainer  c;
1712   PetscInt        m, n, M, N;
1713   PetscErrorCode  ierr;
1714 
1715   PetscFunctionBegin;
1716   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1717   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1718   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1719   if (!row_identity || !col_identity) {
1720     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1721   }
1722 
1723   process_group_type pg;
1724   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1725   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1726   lgraph_type&   level_graph = *lgraph_p;
1727   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1728 
1729   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1730   ilu_permuted(level_graph);
1731 
1732   /* put together the new matrix */
1733   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1734   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1735   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1736   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1737   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1738   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1739   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1740   (*fact)->factor = FACTOR_LU;
1741 
1742   ierr = PetscContainerCreate(A->comm, &c);
1743   ierr = PetscContainerSetPointer(c, lgraph_p);
1744   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1750 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1751 {
1752   PetscFunctionBegin;
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatSolve_MPIAIJ"
1758 /*
1759   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1760 */
1761 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1762 {
1763   namespace graph_dist = boost::graph::distributed;
1764 
1765   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1766   lgraph_type*   lgraph_p;
1767   PetscContainer c;
1768   PetscErrorCode ierr;
1769 
1770   PetscFunctionBegin;
1771   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1772   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1773   ierr = VecCopy(b, x); CHKERRQ(ierr);
1774 
1775   PetscScalar* array_x;
1776   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1777   PetscInt sx;
1778   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1779 
1780   PetscScalar* array_b;
1781   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1782   PetscInt sb;
1783   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1784 
1785   lgraph_type&   level_graph = *lgraph_p;
1786   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1787 
1788   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1789   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1790                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1791 
1792   typedef boost::iterator_property_map<array_ref_type::iterator,
1793                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1794   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1795                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1796 
1797   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1798 
1799   PetscFunctionReturn(0);
1800 }
1801 #endif
1802 
1803 /* -------------------------------------------------------------------*/
1804 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1805        MatGetRow_MPIAIJ,
1806        MatRestoreRow_MPIAIJ,
1807        MatMult_MPIAIJ,
1808 /* 4*/ MatMultAdd_MPIAIJ,
1809        MatMultTranspose_MPIAIJ,
1810        MatMultTransposeAdd_MPIAIJ,
1811 #ifdef PETSC_HAVE_PBGL
1812        MatSolve_MPIAIJ,
1813 #else
1814        0,
1815 #endif
1816        0,
1817        0,
1818 /*10*/ 0,
1819        0,
1820        0,
1821        MatRelax_MPIAIJ,
1822        MatTranspose_MPIAIJ,
1823 /*15*/ MatGetInfo_MPIAIJ,
1824        MatEqual_MPIAIJ,
1825        MatGetDiagonal_MPIAIJ,
1826        MatDiagonalScale_MPIAIJ,
1827        MatNorm_MPIAIJ,
1828 /*20*/ MatAssemblyBegin_MPIAIJ,
1829        MatAssemblyEnd_MPIAIJ,
1830        0,
1831        MatSetOption_MPIAIJ,
1832        MatZeroEntries_MPIAIJ,
1833 /*25*/ MatZeroRows_MPIAIJ,
1834        0,
1835 #ifdef PETSC_HAVE_PBGL
1836        MatLUFactorNumeric_MPIAIJ,
1837 #else
1838        0,
1839 #endif
1840        0,
1841        0,
1842 /*30*/ MatSetUpPreallocation_MPIAIJ,
1843 #ifdef PETSC_HAVE_PBGL
1844        MatILUFactorSymbolic_MPIAIJ,
1845 #else
1846        0,
1847 #endif
1848        0,
1849        0,
1850        0,
1851 /*35*/ MatDuplicate_MPIAIJ,
1852        0,
1853        0,
1854        0,
1855        0,
1856 /*40*/ MatAXPY_MPIAIJ,
1857        MatGetSubMatrices_MPIAIJ,
1858        MatIncreaseOverlap_MPIAIJ,
1859        MatGetValues_MPIAIJ,
1860        MatCopy_MPIAIJ,
1861 /*45*/ 0,
1862        MatScale_MPIAIJ,
1863        0,
1864        0,
1865        0,
1866 /*50*/ MatSetBlockSize_MPIAIJ,
1867        0,
1868        0,
1869        0,
1870        0,
1871 /*55*/ MatFDColoringCreate_MPIAIJ,
1872        0,
1873        MatSetUnfactored_MPIAIJ,
1874        MatPermute_MPIAIJ,
1875        0,
1876 /*60*/ MatGetSubMatrix_MPIAIJ,
1877        MatDestroy_MPIAIJ,
1878        MatView_MPIAIJ,
1879        0,
1880        0,
1881 /*65*/ 0,
1882        0,
1883        0,
1884        0,
1885        0,
1886 /*70*/ 0,
1887        0,
1888        MatSetColoring_MPIAIJ,
1889 #if defined(PETSC_HAVE_ADIC)
1890        MatSetValuesAdic_MPIAIJ,
1891 #else
1892        0,
1893 #endif
1894        MatSetValuesAdifor_MPIAIJ,
1895 /*75*/ 0,
1896        0,
1897        0,
1898        0,
1899        0,
1900 /*80*/ 0,
1901        0,
1902        0,
1903        0,
1904 /*84*/ MatLoad_MPIAIJ,
1905        0,
1906        0,
1907        0,
1908        0,
1909        0,
1910 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1911        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1912        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1913        MatPtAP_Basic,
1914        MatPtAPSymbolic_MPIAIJ,
1915 /*95*/ MatPtAPNumeric_MPIAIJ,
1916        0,
1917        0,
1918        0,
1919        0,
1920 /*100*/0,
1921        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1922        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1923        MatConjugate_MPIAIJ,
1924        0,
1925 /*105*/MatSetValuesRow_MPIAIJ,
1926        MatRealPart_MPIAIJ,
1927        MatImaginaryPart_MPIAIJ};
1928 
1929 /* ----------------------------------------------------------------------------------------*/
1930 
1931 EXTERN_C_BEGIN
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1934 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1935 {
1936   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1937   PetscErrorCode ierr;
1938 
1939   PetscFunctionBegin;
1940   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1941   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 EXTERN_C_END
1945 
1946 EXTERN_C_BEGIN
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1949 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1950 {
1951   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1956   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 EXTERN_C_END
1960 
1961 #include "petscpc.h"
1962 EXTERN_C_BEGIN
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1966 {
1967   Mat_MPIAIJ     *b;
1968   PetscErrorCode ierr;
1969   PetscInt       i;
1970 
1971   PetscFunctionBegin;
1972   B->preallocated = PETSC_TRUE;
1973   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1974   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1975   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1976   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1977 
1978   B->rmap.bs = B->cmap.bs = 1;
1979   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1980   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1981   if (d_nnz) {
1982     for (i=0; i<B->rmap.n; i++) {
1983       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1984     }
1985   }
1986   if (o_nnz) {
1987     for (i=0; i<B->rmap.n; i++) {
1988       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1989     }
1990   }
1991   b = (Mat_MPIAIJ*)B->data;
1992 
1993   /* Explicitly create 2 MATSEQAIJ matrices. */
1994   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1995   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1996   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1997   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1998   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1999   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2000   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2001   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2002 
2003   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2004   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2005 
2006   PetscFunctionReturn(0);
2007 }
2008 EXTERN_C_END
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2012 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2013 {
2014   Mat            mat;
2015   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   *newmat       = 0;
2020   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2021   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2022   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2023   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2024   a    = (Mat_MPIAIJ*)mat->data;
2025 
2026   mat->factor       = matin->factor;
2027   mat->rmap.bs      = matin->rmap.bs;
2028   mat->assembled    = PETSC_TRUE;
2029   mat->insertmode   = NOT_SET_VALUES;
2030   mat->preallocated = PETSC_TRUE;
2031 
2032   a->size           = oldmat->size;
2033   a->rank           = oldmat->rank;
2034   a->donotstash     = oldmat->donotstash;
2035   a->roworiented    = oldmat->roworiented;
2036   a->rowindices     = 0;
2037   a->rowvalues      = 0;
2038   a->getrowactive   = PETSC_FALSE;
2039 
2040   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2041   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2042 
2043   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2044   if (oldmat->colmap) {
2045 #if defined (PETSC_USE_CTABLE)
2046     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2047 #else
2048     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2049     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2051 #endif
2052   } else a->colmap = 0;
2053   if (oldmat->garray) {
2054     PetscInt len;
2055     len  = oldmat->B->cmap.n;
2056     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2057     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2058     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2059   } else a->garray = 0;
2060 
2061   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2062   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2063   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2064   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2065   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2066   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2067   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2068   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2069   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2070   *newmat = mat;
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #include "petscsys.h"
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatLoad_MPIAIJ"
2078 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2079 {
2080   Mat            A;
2081   PetscScalar    *vals,*svals;
2082   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2083   MPI_Status     status;
2084   PetscErrorCode ierr;
2085   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2086   PetscInt       i,nz,j,rstart,rend,mmax;
2087   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2088   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2089   PetscInt       cend,cstart,n,*rowners;
2090   int            fd;
2091 
2092   PetscFunctionBegin;
2093   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2094   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2095   if (!rank) {
2096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2097     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2098     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2099   }
2100 
2101   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2102   M = header[1]; N = header[2];
2103   /* determine ownership of all rows */
2104   m    = M/size + ((M % size) > rank);
2105   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2106   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2107 
2108   /* First process needs enough room for process with most rows */
2109   if (!rank) {
2110     mmax       = rowners[1];
2111     for (i=2; i<size; i++) {
2112       mmax = PetscMax(mmax,rowners[i]);
2113     }
2114   } else mmax = m;
2115 
2116   rowners[0] = 0;
2117   for (i=2; i<=size; i++) {
2118     rowners[i] += rowners[i-1];
2119   }
2120   rstart = rowners[rank];
2121   rend   = rowners[rank+1];
2122 
2123   /* distribute row lengths to all processors */
2124   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2125   if (!rank) {
2126     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2127     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2128     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2129     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (j=0; j<m; j++) {
2131       procsnz[0] += ourlens[j];
2132     }
2133     for (i=1; i<size; i++) {
2134       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2135       /* calculate the number of nonzeros on each processor */
2136       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2137         procsnz[i] += rowlengths[j];
2138       }
2139       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2140     }
2141     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2142   } else {
2143     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2144   }
2145 
2146   if (!rank) {
2147     /* determine max buffer needed and allocate it */
2148     maxnz = 0;
2149     for (i=0; i<size; i++) {
2150       maxnz = PetscMax(maxnz,procsnz[i]);
2151     }
2152     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2153 
2154     /* read in my part of the matrix column indices  */
2155     nz   = procsnz[0];
2156     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2157     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2158 
2159     /* read in every one elses and ship off */
2160     for (i=1; i<size; i++) {
2161       nz   = procsnz[i];
2162       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2163       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2164     }
2165     ierr = PetscFree(cols);CHKERRQ(ierr);
2166   } else {
2167     /* determine buffer space needed for message */
2168     nz = 0;
2169     for (i=0; i<m; i++) {
2170       nz += ourlens[i];
2171     }
2172     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2173 
2174     /* receive message of column indices*/
2175     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2176     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2177     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2178   }
2179 
2180   /* determine column ownership if matrix is not square */
2181   if (N != M) {
2182     n      = N/size + ((N % size) > rank);
2183     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2184     cstart = cend - n;
2185   } else {
2186     cstart = rstart;
2187     cend   = rend;
2188     n      = cend - cstart;
2189   }
2190 
2191   /* loop over local rows, determining number of off diagonal entries */
2192   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2193   jj = 0;
2194   for (i=0; i<m; i++) {
2195     for (j=0; j<ourlens[i]; j++) {
2196       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2197       jj++;
2198     }
2199   }
2200 
2201   /* create our matrix */
2202   for (i=0; i<m; i++) {
2203     ourlens[i] -= offlens[i];
2204   }
2205   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2206   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2207   ierr = MatSetType(A,type);CHKERRQ(ierr);
2208   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2209 
2210   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2211   for (i=0; i<m; i++) {
2212     ourlens[i] += offlens[i];
2213   }
2214 
2215   if (!rank) {
2216     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2217 
2218     /* read in my part of the matrix numerical values  */
2219     nz   = procsnz[0];
2220     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2221 
2222     /* insert into matrix */
2223     jj      = rstart;
2224     smycols = mycols;
2225     svals   = vals;
2226     for (i=0; i<m; i++) {
2227       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2228       smycols += ourlens[i];
2229       svals   += ourlens[i];
2230       jj++;
2231     }
2232 
2233     /* read in other processors and ship out */
2234     for (i=1; i<size; i++) {
2235       nz   = procsnz[i];
2236       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2237       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2238     }
2239     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2240   } else {
2241     /* receive numeric values */
2242     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2243 
2244     /* receive message of values*/
2245     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2246     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2247     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2248 
2249     /* insert into matrix */
2250     jj      = rstart;
2251     smycols = mycols;
2252     svals   = vals;
2253     for (i=0; i<m; i++) {
2254       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2255       smycols += ourlens[i];
2256       svals   += ourlens[i];
2257       jj++;
2258     }
2259   }
2260   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2261   ierr = PetscFree(vals);CHKERRQ(ierr);
2262   ierr = PetscFree(mycols);CHKERRQ(ierr);
2263   ierr = PetscFree(rowners);CHKERRQ(ierr);
2264 
2265   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2267   *newmat = A;
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2273 /*
2274     Not great since it makes two copies of the submatrix, first an SeqAIJ
2275   in local and then by concatenating the local matrices the end result.
2276   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2277 */
2278 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2279 {
2280   PetscErrorCode ierr;
2281   PetscMPIInt    rank,size;
2282   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2283   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2284   Mat            *local,M,Mreuse;
2285   PetscScalar    *vwork,*aa;
2286   MPI_Comm       comm = mat->comm;
2287   Mat_SeqAIJ     *aij;
2288 
2289 
2290   PetscFunctionBegin;
2291   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2292   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2293 
2294   if (call ==  MAT_REUSE_MATRIX) {
2295     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2296     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2297     local = &Mreuse;
2298     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2299   } else {
2300     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2301     Mreuse = *local;
2302     ierr   = PetscFree(local);CHKERRQ(ierr);
2303   }
2304 
2305   /*
2306       m - number of local rows
2307       n - number of columns (same on all processors)
2308       rstart - first row in new global matrix generated
2309   */
2310   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2311   if (call == MAT_INITIAL_MATRIX) {
2312     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2313     ii  = aij->i;
2314     jj  = aij->j;
2315 
2316     /*
2317         Determine the number of non-zeros in the diagonal and off-diagonal
2318         portions of the matrix in order to do correct preallocation
2319     */
2320 
2321     /* first get start and end of "diagonal" columns */
2322     if (csize == PETSC_DECIDE) {
2323       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2324       if (mglobal == n) { /* square matrix */
2325 	nlocal = m;
2326       } else {
2327         nlocal = n/size + ((n % size) > rank);
2328       }
2329     } else {
2330       nlocal = csize;
2331     }
2332     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2333     rstart = rend - nlocal;
2334     if (rank == size - 1 && rend != n) {
2335       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2336     }
2337 
2338     /* next, compute all the lengths */
2339     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2340     olens = dlens + m;
2341     for (i=0; i<m; i++) {
2342       jend = ii[i+1] - ii[i];
2343       olen = 0;
2344       dlen = 0;
2345       for (j=0; j<jend; j++) {
2346         if (*jj < rstart || *jj >= rend) olen++;
2347         else dlen++;
2348         jj++;
2349       }
2350       olens[i] = olen;
2351       dlens[i] = dlen;
2352     }
2353     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2354     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2355     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2356     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2357     ierr = PetscFree(dlens);CHKERRQ(ierr);
2358   } else {
2359     PetscInt ml,nl;
2360 
2361     M = *newmat;
2362     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2363     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2364     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2365     /*
2366          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2367        rather than the slower MatSetValues().
2368     */
2369     M->was_assembled = PETSC_TRUE;
2370     M->assembled     = PETSC_FALSE;
2371   }
2372   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2373   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2374   ii  = aij->i;
2375   jj  = aij->j;
2376   aa  = aij->a;
2377   for (i=0; i<m; i++) {
2378     row   = rstart + i;
2379     nz    = ii[i+1] - ii[i];
2380     cwork = jj;     jj += nz;
2381     vwork = aa;     aa += nz;
2382     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2383   }
2384 
2385   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   *newmat = M;
2388 
2389   /* save submatrix used in processor for next request */
2390   if (call ==  MAT_INITIAL_MATRIX) {
2391     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2392     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2393   }
2394 
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 EXTERN_C_BEGIN
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2401 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2402 {
2403   PetscInt       m,cstart, cend,j,nnz,i,d;
2404   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2405   const PetscInt *JJ;
2406   PetscScalar    *values;
2407   PetscErrorCode ierr;
2408 
2409   PetscFunctionBegin;
2410   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2411 
2412   B->rmap.bs = B->cmap.bs = 1;
2413   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2414   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2415   m      = B->rmap.n;
2416   cstart = B->cmap.rstart;
2417   cend   = B->cmap.rend;
2418   rstart = B->rmap.rstart;
2419 
2420   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2421   o_nnz = d_nnz + m;
2422 
2423   for (i=0; i<m; i++) {
2424     nnz     = Ii[i+1]- Ii[i];
2425     JJ      = J + Ii[i];
2426     nnz_max = PetscMax(nnz_max,nnz);
2427     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2428     for (j=0; j<nnz; j++) {
2429       if (*JJ >= cstart) break;
2430       JJ++;
2431     }
2432     d = 0;
2433     for (; j<nnz; j++) {
2434       if (*JJ++ >= cend) break;
2435       d++;
2436     }
2437     d_nnz[i] = d;
2438     o_nnz[i] = nnz - d;
2439   }
2440   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2441   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2442 
2443   if (v) values = (PetscScalar*)v;
2444   else {
2445     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2446     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2447   }
2448 
2449   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2450   for (i=0; i<m; i++) {
2451     ii   = i + rstart;
2452     nnz  = Ii[i+1]- Ii[i];
2453     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2454   }
2455   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2457   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2458 
2459   if (!v) {
2460     ierr = PetscFree(values);CHKERRQ(ierr);
2461   }
2462   PetscFunctionReturn(0);
2463 }
2464 EXTERN_C_END
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2468 /*@
2469    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2470    (the default parallel PETSc format).
2471 
2472    Collective on MPI_Comm
2473 
2474    Input Parameters:
2475 +  B - the matrix
2476 .  i - the indices into j for the start of each local row (starts with zero)
2477 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2478 -  v - optional values in the matrix
2479 
2480    Level: developer
2481 
2482    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2483      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2484 
2485 .keywords: matrix, aij, compressed row, sparse, parallel
2486 
2487 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2488           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2489 @*/
2490 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2491 {
2492   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2493 
2494   PetscFunctionBegin;
2495   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2496   if (f) {
2497     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2498   }
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2504 /*@C
2505    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2506    (the default parallel PETSc format).  For good matrix assembly performance
2507    the user should preallocate the matrix storage by setting the parameters
2508    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2509    performance can be increased by more than a factor of 50.
2510 
2511    Collective on MPI_Comm
2512 
2513    Input Parameters:
2514 +  A - the matrix
2515 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2516            (same value is used for all local rows)
2517 .  d_nnz - array containing the number of nonzeros in the various rows of the
2518            DIAGONAL portion of the local submatrix (possibly different for each row)
2519            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2520            The size of this array is equal to the number of local rows, i.e 'm'.
2521            You must leave room for the diagonal entry even if it is zero.
2522 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2523            submatrix (same value is used for all local rows).
2524 -  o_nnz - array containing the number of nonzeros in the various rows of the
2525            OFF-DIAGONAL portion of the local submatrix (possibly different for
2526            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2527            structure. The size of this array is equal to the number
2528            of local rows, i.e 'm'.
2529 
2530    If the *_nnz parameter is given then the *_nz parameter is ignored
2531 
2532    The AIJ format (also called the Yale sparse matrix format or
2533    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2534    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2535 
2536    The parallel matrix is partitioned such that the first m0 rows belong to
2537    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2538    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2539 
2540    The DIAGONAL portion of the local submatrix of a processor can be defined
2541    as the submatrix which is obtained by extraction the part corresponding
2542    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2543    first row that belongs to the processor, and r2 is the last row belonging
2544    to the this processor. This is a square mxm matrix. The remaining portion
2545    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2546 
2547    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2548 
2549    Example usage:
2550 
2551    Consider the following 8x8 matrix with 34 non-zero values, that is
2552    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2553    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2554    as follows:
2555 
2556 .vb
2557             1  2  0  |  0  3  0  |  0  4
2558     Proc0   0  5  6  |  7  0  0  |  8  0
2559             9  0 10  | 11  0  0  | 12  0
2560     -------------------------------------
2561            13  0 14  | 15 16 17  |  0  0
2562     Proc1   0 18  0  | 19 20 21  |  0  0
2563             0  0  0  | 22 23  0  | 24  0
2564     -------------------------------------
2565     Proc2  25 26 27  |  0  0 28  | 29  0
2566            30  0  0  | 31 32 33  |  0 34
2567 .ve
2568 
2569    This can be represented as a collection of submatrices as:
2570 
2571 .vb
2572       A B C
2573       D E F
2574       G H I
2575 .ve
2576 
2577    Where the submatrices A,B,C are owned by proc0, D,E,F are
2578    owned by proc1, G,H,I are owned by proc2.
2579 
2580    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2581    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2582    The 'M','N' parameters are 8,8, and have the same values on all procs.
2583 
2584    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2585    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2586    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2587    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2588    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2589    matrix, ans [DF] as another SeqAIJ matrix.
2590 
2591    When d_nz, o_nz parameters are specified, d_nz storage elements are
2592    allocated for every row of the local diagonal submatrix, and o_nz
2593    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2594    One way to choose d_nz and o_nz is to use the max nonzerors per local
2595    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2596    In this case, the values of d_nz,o_nz are:
2597 .vb
2598      proc0 : dnz = 2, o_nz = 2
2599      proc1 : dnz = 3, o_nz = 2
2600      proc2 : dnz = 1, o_nz = 4
2601 .ve
2602    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2603    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2604    for proc3. i.e we are using 12+15+10=37 storage locations to store
2605    34 values.
2606 
2607    When d_nnz, o_nnz parameters are specified, the storage is specified
2608    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2609    In the above case the values for d_nnz,o_nnz are:
2610 .vb
2611      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2612      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2613      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2614 .ve
2615    Here the space allocated is sum of all the above values i.e 34, and
2616    hence pre-allocation is perfect.
2617 
2618    Level: intermediate
2619 
2620 .keywords: matrix, aij, compressed row, sparse, parallel
2621 
2622 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2623           MPIAIJ
2624 @*/
2625 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2626 {
2627   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2628 
2629   PetscFunctionBegin;
2630   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2631   if (f) {
2632     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2633   }
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 #undef __FUNCT__
2638 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2639 /*@C
2640      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2641          CSR format the local rows.
2642 
2643    Collective on MPI_Comm
2644 
2645    Input Parameters:
2646 +  comm - MPI communicator
2647 .  m - number of local rows (Cannot be PETSC_DECIDE)
2648 .  n - This value should be the same as the local size used in creating the
2649        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2650        calculated if N is given) For square matrices n is almost always m.
2651 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2652 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2653 .   i - row indices
2654 .   j - column indices
2655 -   a - matrix values
2656 
2657    Output Parameter:
2658 .   mat - the matrix
2659 
2660    Level: intermediate
2661 
2662    Notes:
2663        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2664      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2665      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2666 
2667        The i and j indices are 0 based
2668 
2669 .keywords: matrix, aij, compressed row, sparse, parallel
2670 
2671 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2672           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2673 @*/
2674 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2675 {
2676   PetscErrorCode ierr;
2677 
2678  PetscFunctionBegin;
2679   if (i[0]) {
2680     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2681   }
2682   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2683   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2684   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2685   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatCreateMPIAIJ"
2691 /*@C
2692    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2693    (the default parallel PETSc format).  For good matrix assembly performance
2694    the user should preallocate the matrix storage by setting the parameters
2695    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2696    performance can be increased by more than a factor of 50.
2697 
2698    Collective on MPI_Comm
2699 
2700    Input Parameters:
2701 +  comm - MPI communicator
2702 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2703            This value should be the same as the local size used in creating the
2704            y vector for the matrix-vector product y = Ax.
2705 .  n - This value should be the same as the local size used in creating the
2706        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2707        calculated if N is given) For square matrices n is almost always m.
2708 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2709 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2710 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2711            (same value is used for all local rows)
2712 .  d_nnz - array containing the number of nonzeros in the various rows of the
2713            DIAGONAL portion of the local submatrix (possibly different for each row)
2714            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2715            The size of this array is equal to the number of local rows, i.e 'm'.
2716            You must leave room for the diagonal entry even if it is zero.
2717 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2718            submatrix (same value is used for all local rows).
2719 -  o_nnz - array containing the number of nonzeros in the various rows of the
2720            OFF-DIAGONAL portion of the local submatrix (possibly different for
2721            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2722            structure. The size of this array is equal to the number
2723            of local rows, i.e 'm'.
2724 
2725    Output Parameter:
2726 .  A - the matrix
2727 
2728    Notes:
2729    If the *_nnz parameter is given then the *_nz parameter is ignored
2730 
2731    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2732    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2733    storage requirements for this matrix.
2734 
2735    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2736    processor than it must be used on all processors that share the object for
2737    that argument.
2738 
2739    The user MUST specify either the local or global matrix dimensions
2740    (possibly both).
2741 
2742    The parallel matrix is partitioned such that the first m0 rows belong to
2743    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2744    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2745 
2746    The DIAGONAL portion of the local submatrix of a processor can be defined
2747    as the submatrix which is obtained by extraction the part corresponding
2748    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2749    first row that belongs to the processor, and r2 is the last row belonging
2750    to the this processor. This is a square mxm matrix. The remaining portion
2751    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2752 
2753    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2754 
2755    When calling this routine with a single process communicator, a matrix of
2756    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2757    type of communicator, use the construction mechanism:
2758      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2759 
2760    By default, this format uses inodes (identical nodes) when possible.
2761    We search for consecutive rows with the same nonzero structure, thereby
2762    reusing matrix information to achieve increased efficiency.
2763 
2764    Options Database Keys:
2765 +  -mat_no_inode  - Do not use inodes
2766 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2767 -  -mat_aij_oneindex - Internally use indexing starting at 1
2768         rather than 0.  Note that when calling MatSetValues(),
2769         the user still MUST index entries starting at 0!
2770 
2771 
2772    Example usage:
2773 
2774    Consider the following 8x8 matrix with 34 non-zero values, that is
2775    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2776    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2777    as follows:
2778 
2779 .vb
2780             1  2  0  |  0  3  0  |  0  4
2781     Proc0   0  5  6  |  7  0  0  |  8  0
2782             9  0 10  | 11  0  0  | 12  0
2783     -------------------------------------
2784            13  0 14  | 15 16 17  |  0  0
2785     Proc1   0 18  0  | 19 20 21  |  0  0
2786             0  0  0  | 22 23  0  | 24  0
2787     -------------------------------------
2788     Proc2  25 26 27  |  0  0 28  | 29  0
2789            30  0  0  | 31 32 33  |  0 34
2790 .ve
2791 
2792    This can be represented as a collection of submatrices as:
2793 
2794 .vb
2795       A B C
2796       D E F
2797       G H I
2798 .ve
2799 
2800    Where the submatrices A,B,C are owned by proc0, D,E,F are
2801    owned by proc1, G,H,I are owned by proc2.
2802 
2803    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2804    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2805    The 'M','N' parameters are 8,8, and have the same values on all procs.
2806 
2807    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2808    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2809    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2810    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2811    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2812    matrix, ans [DF] as another SeqAIJ matrix.
2813 
2814    When d_nz, o_nz parameters are specified, d_nz storage elements are
2815    allocated for every row of the local diagonal submatrix, and o_nz
2816    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2817    One way to choose d_nz and o_nz is to use the max nonzerors per local
2818    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2819    In this case, the values of d_nz,o_nz are:
2820 .vb
2821      proc0 : dnz = 2, o_nz = 2
2822      proc1 : dnz = 3, o_nz = 2
2823      proc2 : dnz = 1, o_nz = 4
2824 .ve
2825    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2826    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2827    for proc3. i.e we are using 12+15+10=37 storage locations to store
2828    34 values.
2829 
2830    When d_nnz, o_nnz parameters are specified, the storage is specified
2831    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2832    In the above case the values for d_nnz,o_nnz are:
2833 .vb
2834      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2835      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2836      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2837 .ve
2838    Here the space allocated is sum of all the above values i.e 34, and
2839    hence pre-allocation is perfect.
2840 
2841    Level: intermediate
2842 
2843 .keywords: matrix, aij, compressed row, sparse, parallel
2844 
2845 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2846           MPIAIJ, MatCreateMPIAIJWithArrays()
2847 @*/
2848 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2849 {
2850   PetscErrorCode ierr;
2851   PetscMPIInt    size;
2852 
2853   PetscFunctionBegin;
2854   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2855   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2856   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2857   if (size > 1) {
2858     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2859     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2860   } else {
2861     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2862     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2863   }
2864   PetscFunctionReturn(0);
2865 }
2866 
2867 #undef __FUNCT__
2868 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2869 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2870 {
2871   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2872 
2873   PetscFunctionBegin;
2874   *Ad     = a->A;
2875   *Ao     = a->B;
2876   *colmap = a->garray;
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 #undef __FUNCT__
2881 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2882 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2883 {
2884   PetscErrorCode ierr;
2885   PetscInt       i;
2886   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2887 
2888   PetscFunctionBegin;
2889   if (coloring->ctype == IS_COLORING_GLOBAL) {
2890     ISColoringValue *allcolors,*colors;
2891     ISColoring      ocoloring;
2892 
2893     /* set coloring for diagonal portion */
2894     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2895 
2896     /* set coloring for off-diagonal portion */
2897     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2898     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2899     for (i=0; i<a->B->cmap.n; i++) {
2900       colors[i] = allcolors[a->garray[i]];
2901     }
2902     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2903     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2904     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2905     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2906   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2907     ISColoringValue *colors;
2908     PetscInt        *larray;
2909     ISColoring      ocoloring;
2910 
2911     /* set coloring for diagonal portion */
2912     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2913     for (i=0; i<a->A->cmap.n; i++) {
2914       larray[i] = i + A->cmap.rstart;
2915     }
2916     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2917     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2918     for (i=0; i<a->A->cmap.n; i++) {
2919       colors[i] = coloring->colors[larray[i]];
2920     }
2921     ierr = PetscFree(larray);CHKERRQ(ierr);
2922     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2923     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2924     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2925 
2926     /* set coloring for off-diagonal portion */
2927     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2928     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2929     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2930     for (i=0; i<a->B->cmap.n; i++) {
2931       colors[i] = coloring->colors[larray[i]];
2932     }
2933     ierr = PetscFree(larray);CHKERRQ(ierr);
2934     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2935     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2936     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2937   } else {
2938     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2939   }
2940 
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #if defined(PETSC_HAVE_ADIC)
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2947 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2948 {
2949   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2954   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 #endif
2958 
2959 #undef __FUNCT__
2960 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2961 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2962 {
2963   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2964   PetscErrorCode ierr;
2965 
2966   PetscFunctionBegin;
2967   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2968   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2969   PetscFunctionReturn(0);
2970 }
2971 
2972 #undef __FUNCT__
2973 #define __FUNCT__ "MatMerge"
2974 /*@C
2975       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2976                  matrices from each processor
2977 
2978     Collective on MPI_Comm
2979 
2980    Input Parameters:
2981 +    comm - the communicators the parallel matrix will live on
2982 .    inmat - the input sequential matrices
2983 .    n - number of local columns (or PETSC_DECIDE)
2984 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2985 
2986    Output Parameter:
2987 .    outmat - the parallel matrix generated
2988 
2989     Level: advanced
2990 
2991    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2992 
2993 @*/
2994 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2995 {
2996   PetscErrorCode ierr;
2997   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
2998   PetscInt       *indx;
2999   PetscScalar    *values;
3000 
3001   PetscFunctionBegin;
3002   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3003   if (scall == MAT_INITIAL_MATRIX){
3004     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3005     if (n == PETSC_DECIDE){
3006       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3007     }
3008     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3009     rstart -= m;
3010 
3011     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3012     for (i=0;i<m;i++) {
3013       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3014       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3015       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3016     }
3017     /* This routine will ONLY return MPIAIJ type matrix */
3018     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3019     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3020     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3021     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3022     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3023 
3024   } else if (scall == MAT_REUSE_MATRIX){
3025     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3026   } else {
3027     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3028   }
3029 
3030   for (i=0;i<m;i++) {
3031     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3032     Ii    = i + rstart;
3033     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3034     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3035   }
3036   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3037   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039 
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatFileSplit"
3045 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3046 {
3047   PetscErrorCode    ierr;
3048   PetscMPIInt       rank;
3049   PetscInt          m,N,i,rstart,nnz;
3050   size_t            len;
3051   const PetscInt    *indx;
3052   PetscViewer       out;
3053   char              *name;
3054   Mat               B;
3055   const PetscScalar *values;
3056 
3057   PetscFunctionBegin;
3058   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3059   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3060   /* Should this be the type of the diagonal block of A? */
3061   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3062   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3063   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3064   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3065   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3066   for (i=0;i<m;i++) {
3067     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3068     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3069     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3070   }
3071   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3072   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073 
3074   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3075   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3076   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3077   sprintf(name,"%s.%d",outfile,rank);
3078   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3079   ierr = PetscFree(name);
3080   ierr = MatView(B,out);CHKERRQ(ierr);
3081   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3082   ierr = MatDestroy(B);CHKERRQ(ierr);
3083   PetscFunctionReturn(0);
3084 }
3085 
3086 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3089 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3090 {
3091   PetscErrorCode       ierr;
3092   Mat_Merge_SeqsToMPI  *merge;
3093   PetscContainer       container;
3094 
3095   PetscFunctionBegin;
3096   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3097   if (container) {
3098     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3099     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3100     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3101     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3102     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3103     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3104     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3105     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3106     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3107     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3108     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3109     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3110 
3111     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3112     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3113   }
3114   ierr = PetscFree(merge);CHKERRQ(ierr);
3115 
3116   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 
3120 #include "src/mat/utils/freespace.h"
3121 #include "petscbt.h"
3122 static PetscEvent logkey_seqstompinum = 0;
3123 #undef __FUNCT__
3124 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3125 /*@C
3126       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3127                  matrices from each processor
3128 
3129     Collective on MPI_Comm
3130 
3131    Input Parameters:
3132 +    comm - the communicators the parallel matrix will live on
3133 .    seqmat - the input sequential matrices
3134 .    m - number of local rows (or PETSC_DECIDE)
3135 .    n - number of local columns (or PETSC_DECIDE)
3136 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3137 
3138    Output Parameter:
3139 .    mpimat - the parallel matrix generated
3140 
3141     Level: advanced
3142 
3143    Notes:
3144      The dimensions of the sequential matrix in each processor MUST be the same.
3145      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3146      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3147 @*/
3148 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3149 {
3150   PetscErrorCode       ierr;
3151   MPI_Comm             comm=mpimat->comm;
3152   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3153   PetscMPIInt          size,rank,taga,*len_s;
3154   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3155   PetscInt             proc,m;
3156   PetscInt             **buf_ri,**buf_rj;
3157   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3158   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3159   MPI_Request          *s_waits,*r_waits;
3160   MPI_Status           *status;
3161   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3162   Mat_Merge_SeqsToMPI  *merge;
3163   PetscContainer       container;
3164 
3165   PetscFunctionBegin;
3166   if (!logkey_seqstompinum) {
3167     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3168   }
3169   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3170 
3171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3172   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3173 
3174   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3175   if (container) {
3176     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3177   }
3178   bi     = merge->bi;
3179   bj     = merge->bj;
3180   buf_ri = merge->buf_ri;
3181   buf_rj = merge->buf_rj;
3182 
3183   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3184   owners = merge->rowmap.range;
3185   len_s  = merge->len_s;
3186 
3187   /* send and recv matrix values */
3188   /*-----------------------------*/
3189   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3190   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3191 
3192   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3193   for (proc=0,k=0; proc<size; proc++){
3194     if (!len_s[proc]) continue;
3195     i = owners[proc];
3196     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3197     k++;
3198   }
3199 
3200   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3201   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3202   ierr = PetscFree(status);CHKERRQ(ierr);
3203 
3204   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3205   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3206 
3207   /* insert mat values of mpimat */
3208   /*----------------------------*/
3209   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3210   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3211   nextrow = buf_ri_k + merge->nrecv;
3212   nextai  = nextrow + merge->nrecv;
3213 
3214   for (k=0; k<merge->nrecv; k++){
3215     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3216     nrows = *(buf_ri_k[k]);
3217     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3218     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3219   }
3220 
3221   /* set values of ba */
3222   m = merge->rowmap.n;
3223   for (i=0; i<m; i++) {
3224     arow = owners[rank] + i;
3225     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3226     bnzi = bi[i+1] - bi[i];
3227     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3228 
3229     /* add local non-zero vals of this proc's seqmat into ba */
3230     anzi = ai[arow+1] - ai[arow];
3231     aj   = a->j + ai[arow];
3232     aa   = a->a + ai[arow];
3233     nextaj = 0;
3234     for (j=0; nextaj<anzi; j++){
3235       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3236         ba_i[j] += aa[nextaj++];
3237       }
3238     }
3239 
3240     /* add received vals into ba */
3241     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3242       /* i-th row */
3243       if (i == *nextrow[k]) {
3244         anzi = *(nextai[k]+1) - *nextai[k];
3245         aj   = buf_rj[k] + *(nextai[k]);
3246         aa   = abuf_r[k] + *(nextai[k]);
3247         nextaj = 0;
3248         for (j=0; nextaj<anzi; j++){
3249           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3250             ba_i[j] += aa[nextaj++];
3251           }
3252         }
3253         nextrow[k]++; nextai[k]++;
3254       }
3255     }
3256     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3257   }
3258   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3259   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3260 
3261   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3262   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3263   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3264   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 static PetscEvent logkey_seqstompisym = 0;
3269 #undef __FUNCT__
3270 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3271 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3272 {
3273   PetscErrorCode       ierr;
3274   Mat                  B_mpi;
3275   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3276   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3277   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3278   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3279   PetscInt             len,proc,*dnz,*onz;
3280   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3281   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3282   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3283   MPI_Status           *status;
3284   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3285   PetscBT              lnkbt;
3286   Mat_Merge_SeqsToMPI  *merge;
3287   PetscContainer       container;
3288 
3289   PetscFunctionBegin;
3290   if (!logkey_seqstompisym) {
3291     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3292   }
3293   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3294 
3295   /* make sure it is a PETSc comm */
3296   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3297   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3298   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3299 
3300   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3301   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3302 
3303   /* determine row ownership */
3304   /*---------------------------------------------------------*/
3305   merge->rowmap.n = m;
3306   merge->rowmap.N = M;
3307   merge->rowmap.bs = 1;
3308   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3309   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3310   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3311 
3312   m      = merge->rowmap.n;
3313   M      = merge->rowmap.N;
3314   owners = merge->rowmap.range;
3315 
3316   /* determine the number of messages to send, their lengths */
3317   /*---------------------------------------------------------*/
3318   len_s  = merge->len_s;
3319 
3320   len = 0;  /* length of buf_si[] */
3321   merge->nsend = 0;
3322   for (proc=0; proc<size; proc++){
3323     len_si[proc] = 0;
3324     if (proc == rank){
3325       len_s[proc] = 0;
3326     } else {
3327       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3328       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3329     }
3330     if (len_s[proc]) {
3331       merge->nsend++;
3332       nrows = 0;
3333       for (i=owners[proc]; i<owners[proc+1]; i++){
3334         if (ai[i+1] > ai[i]) nrows++;
3335       }
3336       len_si[proc] = 2*(nrows+1);
3337       len += len_si[proc];
3338     }
3339   }
3340 
3341   /* determine the number and length of messages to receive for ij-structure */
3342   /*-------------------------------------------------------------------------*/
3343   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3344   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3345 
3346   /* post the Irecv of j-structure */
3347   /*-------------------------------*/
3348   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3349   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3350 
3351   /* post the Isend of j-structure */
3352   /*--------------------------------*/
3353   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3354   sj_waits = si_waits + merge->nsend;
3355 
3356   for (proc=0, k=0; proc<size; proc++){
3357     if (!len_s[proc]) continue;
3358     i = owners[proc];
3359     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3360     k++;
3361   }
3362 
3363   /* receives and sends of j-structure are complete */
3364   /*------------------------------------------------*/
3365   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3366   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3367 
3368   /* send and recv i-structure */
3369   /*---------------------------*/
3370   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3371   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3372 
3373   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3374   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3375   for (proc=0,k=0; proc<size; proc++){
3376     if (!len_s[proc]) continue;
3377     /* form outgoing message for i-structure:
3378          buf_si[0]:                 nrows to be sent
3379                [1:nrows]:           row index (global)
3380                [nrows+1:2*nrows+1]: i-structure index
3381     */
3382     /*-------------------------------------------*/
3383     nrows = len_si[proc]/2 - 1;
3384     buf_si_i    = buf_si + nrows+1;
3385     buf_si[0]   = nrows;
3386     buf_si_i[0] = 0;
3387     nrows = 0;
3388     for (i=owners[proc]; i<owners[proc+1]; i++){
3389       anzi = ai[i+1] - ai[i];
3390       if (anzi) {
3391         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3392         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3393         nrows++;
3394       }
3395     }
3396     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3397     k++;
3398     buf_si += len_si[proc];
3399   }
3400 
3401   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3402   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3403 
3404   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3405   for (i=0; i<merge->nrecv; i++){
3406     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3407   }
3408 
3409   ierr = PetscFree(len_si);CHKERRQ(ierr);
3410   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3411   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3412   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3413   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3414   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3415   ierr = PetscFree(status);CHKERRQ(ierr);
3416 
3417   /* compute a local seq matrix in each processor */
3418   /*----------------------------------------------*/
3419   /* allocate bi array and free space for accumulating nonzero column info */
3420   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3421   bi[0] = 0;
3422 
3423   /* create and initialize a linked list */
3424   nlnk = N+1;
3425   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3426 
3427   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3428   len = 0;
3429   len  = ai[owners[rank+1]] - ai[owners[rank]];
3430   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3431   current_space = free_space;
3432 
3433   /* determine symbolic info for each local row */
3434   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3435   nextrow = buf_ri_k + merge->nrecv;
3436   nextai  = nextrow + merge->nrecv;
3437   for (k=0; k<merge->nrecv; k++){
3438     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3439     nrows = *buf_ri_k[k];
3440     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3441     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3442   }
3443 
3444   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3445   len = 0;
3446   for (i=0;i<m;i++) {
3447     bnzi   = 0;
3448     /* add local non-zero cols of this proc's seqmat into lnk */
3449     arow   = owners[rank] + i;
3450     anzi   = ai[arow+1] - ai[arow];
3451     aj     = a->j + ai[arow];
3452     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3453     bnzi += nlnk;
3454     /* add received col data into lnk */
3455     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3456       if (i == *nextrow[k]) { /* i-th row */
3457         anzi = *(nextai[k]+1) - *nextai[k];
3458         aj   = buf_rj[k] + *nextai[k];
3459         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3460         bnzi += nlnk;
3461         nextrow[k]++; nextai[k]++;
3462       }
3463     }
3464     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3465 
3466     /* if free space is not available, make more free space */
3467     if (current_space->local_remaining<bnzi) {
3468       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3469       nspacedouble++;
3470     }
3471     /* copy data into free space, then initialize lnk */
3472     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3473     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3474 
3475     current_space->array           += bnzi;
3476     current_space->local_used      += bnzi;
3477     current_space->local_remaining -= bnzi;
3478 
3479     bi[i+1] = bi[i] + bnzi;
3480   }
3481 
3482   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3483 
3484   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3485   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3486   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3487 
3488   /* create symbolic parallel matrix B_mpi */
3489   /*---------------------------------------*/
3490   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3491   if (n==PETSC_DECIDE) {
3492     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3493   } else {
3494     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3495   }
3496   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3497   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3498   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3499 
3500   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3501   B_mpi->assembled     = PETSC_FALSE;
3502   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3503   merge->bi            = bi;
3504   merge->bj            = bj;
3505   merge->buf_ri        = buf_ri;
3506   merge->buf_rj        = buf_rj;
3507   merge->coi           = PETSC_NULL;
3508   merge->coj           = PETSC_NULL;
3509   merge->owners_co     = PETSC_NULL;
3510 
3511   /* attach the supporting struct to B_mpi for reuse */
3512   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3513   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3514   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3515   *mpimat = B_mpi;
3516 
3517   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3518   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 static PetscEvent logkey_seqstompi = 0;
3523 #undef __FUNCT__
3524 #define __FUNCT__ "MatMerge_SeqsToMPI"
3525 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3526 {
3527   PetscErrorCode   ierr;
3528 
3529   PetscFunctionBegin;
3530   if (!logkey_seqstompi) {
3531     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3532   }
3533   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3534   if (scall == MAT_INITIAL_MATRIX){
3535     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3536   }
3537   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3538   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 static PetscEvent logkey_getlocalmat = 0;
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetLocalMat"
3544 /*@C
3545      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3546 
3547     Not Collective
3548 
3549    Input Parameters:
3550 +    A - the matrix
3551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3552 
3553    Output Parameter:
3554 .    A_loc - the local sequential matrix generated
3555 
3556     Level: developer
3557 
3558 @*/
3559 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3560 {
3561   PetscErrorCode  ierr;
3562   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3563   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3564   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3565   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3566   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3567   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3568 
3569   PetscFunctionBegin;
3570   if (!logkey_getlocalmat) {
3571     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3572   }
3573   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3574   if (scall == MAT_INITIAL_MATRIX){
3575     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3576     ci[0] = 0;
3577     for (i=0; i<am; i++){
3578       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3579     }
3580     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3581     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3582     k = 0;
3583     for (i=0; i<am; i++) {
3584       ncols_o = bi[i+1] - bi[i];
3585       ncols_d = ai[i+1] - ai[i];
3586       /* off-diagonal portion of A */
3587       for (jo=0; jo<ncols_o; jo++) {
3588         col = cmap[*bj];
3589         if (col >= cstart) break;
3590         cj[k]   = col; bj++;
3591         ca[k++] = *ba++;
3592       }
3593       /* diagonal portion of A */
3594       for (j=0; j<ncols_d; j++) {
3595         cj[k]   = cstart + *aj++;
3596         ca[k++] = *aa++;
3597       }
3598       /* off-diagonal portion of A */
3599       for (j=jo; j<ncols_o; j++) {
3600         cj[k]   = cmap[*bj++];
3601         ca[k++] = *ba++;
3602       }
3603     }
3604     /* put together the new matrix */
3605     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3606     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3607     /* Since these are PETSc arrays, change flags to free them as necessary. */
3608     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3609     mat->free_a  = PETSC_TRUE;
3610     mat->free_ij = PETSC_TRUE;
3611     mat->nonew   = 0;
3612   } else if (scall == MAT_REUSE_MATRIX){
3613     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3614     ci = mat->i; cj = mat->j; ca = mat->a;
3615     for (i=0; i<am; i++) {
3616       /* off-diagonal portion of A */
3617       ncols_o = bi[i+1] - bi[i];
3618       for (jo=0; jo<ncols_o; jo++) {
3619         col = cmap[*bj];
3620         if (col >= cstart) break;
3621         *ca++ = *ba++; bj++;
3622       }
3623       /* diagonal portion of A */
3624       ncols_d = ai[i+1] - ai[i];
3625       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3626       /* off-diagonal portion of A */
3627       for (j=jo; j<ncols_o; j++) {
3628         *ca++ = *ba++; bj++;
3629       }
3630     }
3631   } else {
3632     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3633   }
3634 
3635   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 static PetscEvent logkey_getlocalmatcondensed = 0;
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatGetLocalMatCondensed"
3642 /*@C
3643      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3644 
3645     Not Collective
3646 
3647    Input Parameters:
3648 +    A - the matrix
3649 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3650 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3651 
3652    Output Parameter:
3653 .    A_loc - the local sequential matrix generated
3654 
3655     Level: developer
3656 
3657 @*/
3658 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3659 {
3660   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3661   PetscErrorCode    ierr;
3662   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3663   IS                isrowa,iscola;
3664   Mat               *aloc;
3665 
3666   PetscFunctionBegin;
3667   if (!logkey_getlocalmatcondensed) {
3668     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3669   }
3670   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3671   if (!row){
3672     start = A->rmap.rstart; end = A->rmap.rend;
3673     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3674   } else {
3675     isrowa = *row;
3676   }
3677   if (!col){
3678     start = A->cmap.rstart;
3679     cmap  = a->garray;
3680     nzA   = a->A->cmap.n;
3681     nzB   = a->B->cmap.n;
3682     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3683     ncols = 0;
3684     for (i=0; i<nzB; i++) {
3685       if (cmap[i] < start) idx[ncols++] = cmap[i];
3686       else break;
3687     }
3688     imark = i;
3689     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3690     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3691     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3692     ierr = PetscFree(idx);CHKERRQ(ierr);
3693   } else {
3694     iscola = *col;
3695   }
3696   if (scall != MAT_INITIAL_MATRIX){
3697     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3698     aloc[0] = *A_loc;
3699   }
3700   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3701   *A_loc = aloc[0];
3702   ierr = PetscFree(aloc);CHKERRQ(ierr);
3703   if (!row){
3704     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3705   }
3706   if (!col){
3707     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3708   }
3709   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 static PetscEvent logkey_GetBrowsOfAcols = 0;
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatGetBrowsOfAcols"
3716 /*@C
3717     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3718 
3719     Collective on Mat
3720 
3721    Input Parameters:
3722 +    A,B - the matrices in mpiaij format
3723 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3724 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3725 
3726    Output Parameter:
3727 +    rowb, colb - index sets of rows and columns of B to extract
3728 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3729 -    B_seq - the sequential matrix generated
3730 
3731     Level: developer
3732 
3733 @*/
3734 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3735 {
3736   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3737   PetscErrorCode    ierr;
3738   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3739   IS                isrowb,iscolb;
3740   Mat               *bseq;
3741 
3742   PetscFunctionBegin;
3743   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3744     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3745   }
3746   if (!logkey_GetBrowsOfAcols) {
3747     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3748   }
3749   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3750 
3751   if (scall == MAT_INITIAL_MATRIX){
3752     start = A->cmap.rstart;
3753     cmap  = a->garray;
3754     nzA   = a->A->cmap.n;
3755     nzB   = a->B->cmap.n;
3756     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3757     ncols = 0;
3758     for (i=0; i<nzB; i++) {  /* row < local row index */
3759       if (cmap[i] < start) idx[ncols++] = cmap[i];
3760       else break;
3761     }
3762     imark = i;
3763     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3764     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3765     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3766     ierr = PetscFree(idx);CHKERRQ(ierr);
3767     *brstart = imark;
3768     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3769   } else {
3770     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3771     isrowb = *rowb; iscolb = *colb;
3772     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3773     bseq[0] = *B_seq;
3774   }
3775   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3776   *B_seq = bseq[0];
3777   ierr = PetscFree(bseq);CHKERRQ(ierr);
3778   if (!rowb){
3779     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3780   } else {
3781     *rowb = isrowb;
3782   }
3783   if (!colb){
3784     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3785   } else {
3786     *colb = iscolb;
3787   }
3788   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3789   PetscFunctionReturn(0);
3790 }
3791 
3792 static PetscEvent logkey_GetBrowsOfAocols = 0;
3793 #undef __FUNCT__
3794 #define __FUNCT__ "MatGetBrowsOfAoCols"
3795 /*@C
3796     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3797     of the OFF-DIAGONAL portion of local A
3798 
3799     Collective on Mat
3800 
3801    Input Parameters:
3802 +    A,B - the matrices in mpiaij format
3803 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3804 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3805 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3806 
3807    Output Parameter:
3808 +    B_oth - the sequential matrix generated
3809 
3810     Level: developer
3811 
3812 @*/
3813 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3814 {
3815   VecScatter_MPI_General *gen_to,*gen_from;
3816   PetscErrorCode         ierr;
3817   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3818   Mat_SeqAIJ             *b_oth;
3819   VecScatter             ctx=a->Mvctx;
3820   MPI_Comm               comm=ctx->comm;
3821   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3822   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3823   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3824   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3825   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
3826   MPI_Status             *sstatus,rstatus;
3827   PetscInt               *cols,sbs,rbs;
3828   PetscScalar            *vals;
3829 
3830   PetscFunctionBegin;
3831   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3832     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3833   }
3834   if (!logkey_GetBrowsOfAocols) {
3835     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3836   }
3837   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3838   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3839 
3840   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3841   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3842   rvalues  = gen_from->values; /* holds the length of receiving row */
3843   svalues  = gen_to->values;   /* holds the length of sending row */
3844   nrecvs   = gen_from->n;
3845   nsends   = gen_to->n;
3846 
3847   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3848   srow     = gen_to->indices;   /* local row index to be sent */
3849   sstarts  = gen_to->starts;
3850   sprocs   = gen_to->procs;
3851   sstatus  = gen_to->sstatus;
3852   sbs      = gen_to->bs;
3853   rstarts  = gen_from->starts;
3854   rprocs   = gen_from->procs;
3855   rbs      = gen_from->bs;
3856 
3857   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3858   if (scall == MAT_INITIAL_MATRIX){
3859     /* i-array */
3860     /*---------*/
3861     /*  post receives */
3862     for (i=0; i<nrecvs; i++){
3863       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
3864       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
3865       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3866     }
3867 
3868     /* pack the outgoing message */
3869     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3870     rstartsj = sstartsj + nsends +1;
3871     sstartsj[0] = 0;  rstartsj[0] = 0;
3872     len = 0; /* total length of j or a array to be sent */
3873     k = 0;
3874     for (i=0; i<nsends; i++){
3875       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
3876       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3877       for (j=0; j<nrows; j++) {
3878         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3879         for (l=0; l<sbs; l++){
3880           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3881           rowlen[j*sbs+l] = ncols;
3882           len += ncols;
3883           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3884         }
3885         k++;
3886       }
3887       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3888       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3889     }
3890     /* recvs and sends of i-array are completed */
3891     i = nrecvs;
3892     while (i--) {
3893       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3894     }
3895     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3896 
3897     /* allocate buffers for sending j and a arrays */
3898     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3899     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3900 
3901     /* create i-array of B_oth */
3902     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3903     b_othi[0] = 0;
3904     len = 0; /* total length of j or a array to be received */
3905     k = 0;
3906     for (i=0; i<nrecvs; i++){
3907       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
3908       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
3909       for (j=0; j<nrows; j++) {
3910         b_othi[k+1] = b_othi[k] + rowlen[j];
3911         len += rowlen[j]; k++;
3912       }
3913       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3914     }
3915 
3916     /* allocate space for j and a arrrays of B_oth */
3917     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3918     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3919 
3920     /* j-array */
3921     /*---------*/
3922     /*  post receives of j-array */
3923     for (i=0; i<nrecvs; i++){
3924       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3925       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3926     }
3927 
3928     /* pack the outgoing message j-array */
3929     k = 0;
3930     for (i=0; i<nsends; i++){
3931       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3932       bufJ = bufj+sstartsj[i];
3933       for (j=0; j<nrows; j++) {
3934         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3935         for (ll=0; ll<sbs; ll++){
3936           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3937           for (l=0; l<ncols; l++){
3938             *bufJ++ = cols[l];
3939           }
3940           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3941         }
3942       }
3943       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3944     }
3945 
3946     /* recvs and sends of j-array are completed */
3947     i = nrecvs;
3948     while (i--) {
3949       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3950     }
3951     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3952   } else if (scall == MAT_REUSE_MATRIX){
3953     sstartsj = *startsj;
3954     rstartsj = sstartsj + nsends +1;
3955     bufa     = *bufa_ptr;
3956     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3957     b_otha   = b_oth->a;
3958   } else {
3959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3960   }
3961 
3962   /* a-array */
3963   /*---------*/
3964   /*  post receives of a-array */
3965   for (i=0; i<nrecvs; i++){
3966     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3967     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3968   }
3969 
3970   /* pack the outgoing message a-array */
3971   k = 0;
3972   for (i=0; i<nsends; i++){
3973     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
3974     bufA = bufa+sstartsj[i];
3975     for (j=0; j<nrows; j++) {
3976       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3977       for (ll=0; ll<sbs; ll++){
3978         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3979         for (l=0; l<ncols; l++){
3980           *bufA++ = vals[l];
3981         }
3982         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3983       }
3984     }
3985     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3986   }
3987   /* recvs and sends of a-array are completed */
3988   i = nrecvs;
3989   while (i--) {
3990     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3991   }
3992   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3993   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3994 
3995   if (scall == MAT_INITIAL_MATRIX){
3996     /* put together the new matrix */
3997     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3998 
3999     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4000     /* Since these are PETSc arrays, change flags to free them as necessary. */
4001     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4002     b_oth->free_a  = PETSC_TRUE;
4003     b_oth->free_ij = PETSC_TRUE;
4004     b_oth->nonew   = 0;
4005 
4006     ierr = PetscFree(bufj);CHKERRQ(ierr);
4007     if (!startsj || !bufa_ptr){
4008       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4009       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4010     } else {
4011       *startsj  = sstartsj;
4012       *bufa_ptr = bufa;
4013     }
4014   }
4015   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 #undef __FUNCT__
4020 #define __FUNCT__ "MatGetCommunicationStructs"
4021 /*@C
4022   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4023 
4024   Not Collective
4025 
4026   Input Parameters:
4027 . A - The matrix in mpiaij format
4028 
4029   Output Parameter:
4030 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4031 . colmap - A map from global column index to local index into lvec
4032 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4033 
4034   Level: developer
4035 
4036 @*/
4037 #if defined (PETSC_USE_CTABLE)
4038 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4039 #else
4040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4041 #endif
4042 {
4043   Mat_MPIAIJ *a;
4044 
4045   PetscFunctionBegin;
4046   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4047   PetscValidPointer(lvec, 2)
4048   PetscValidPointer(colmap, 3)
4049   PetscValidPointer(multScatter, 4)
4050   a = (Mat_MPIAIJ *) A->data;
4051   if (lvec) *lvec = a->lvec;
4052   if (colmap) *colmap = a->colmap;
4053   if (multScatter) *multScatter = a->Mvctx;
4054   PetscFunctionReturn(0);
4055 }
4056 
4057 EXTERN_C_BEGIN
4058 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4059 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4060 EXTERN_C_END
4061 
4062 /*MC
4063    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4064 
4065    Options Database Keys:
4066 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4067 
4068   Level: beginner
4069 
4070 .seealso: MatCreateMPIAIJ
4071 M*/
4072 
4073 EXTERN_C_BEGIN
4074 #undef __FUNCT__
4075 #define __FUNCT__ "MatCreate_MPIAIJ"
4076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4077 {
4078   Mat_MPIAIJ     *b;
4079   PetscErrorCode ierr;
4080   PetscMPIInt    size;
4081 
4082   PetscFunctionBegin;
4083   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4084 
4085   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4086   B->data         = (void*)b;
4087   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4088   B->factor       = 0;
4089   B->rmap.bs      = 1;
4090   B->assembled    = PETSC_FALSE;
4091   B->mapping      = 0;
4092 
4093   B->insertmode      = NOT_SET_VALUES;
4094   b->size            = size;
4095   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4096 
4097   /* build cache for off array entries formed */
4098   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4099   b->donotstash  = PETSC_FALSE;
4100   b->colmap      = 0;
4101   b->garray      = 0;
4102   b->roworiented = PETSC_TRUE;
4103 
4104   /* stuff used for matrix vector multiply */
4105   b->lvec      = PETSC_NULL;
4106   b->Mvctx     = PETSC_NULL;
4107 
4108   /* stuff for MatGetRow() */
4109   b->rowindices   = 0;
4110   b->rowvalues    = 0;
4111   b->getrowactive = PETSC_FALSE;
4112 
4113 
4114   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4115                                      "MatStoreValues_MPIAIJ",
4116                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4117   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4118                                      "MatRetrieveValues_MPIAIJ",
4119                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4120   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4121 				     "MatGetDiagonalBlock_MPIAIJ",
4122                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4123   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4124 				     "MatIsTranspose_MPIAIJ",
4125 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4126   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4127 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4128 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4129   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4130 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4131 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4132   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4133 				     "MatDiagonalScaleLocal_MPIAIJ",
4134 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4135   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4136                                      "MatConvert_MPIAIJ_MPICSRPERM",
4137                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4138   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4139                                      "MatConvert_MPIAIJ_MPICRL",
4140                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4141   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4142   PetscFunctionReturn(0);
4143 }
4144 EXTERN_C_END
4145 
4146 #undef __FUNCT__
4147 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4148 /*@C
4149      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4150          and "off-diagonal" part of the matrix in CSR format.
4151 
4152    Collective on MPI_Comm
4153 
4154    Input Parameters:
4155 +  comm - MPI communicator
4156 .  m - number of local rows (Cannot be PETSC_DECIDE)
4157 .  n - This value should be the same as the local size used in creating the
4158        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4159        calculated if N is given) For square matrices n is almost always m.
4160 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4161 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4162 .   i - row indices for "diagonal" portion of matrix
4163 .   j - column indices
4164 .   a - matrix values
4165 .   oi - row indices for "off-diagonal" portion of matrix
4166 .   oj - column indices
4167 -   oa - matrix values
4168 
4169    Output Parameter:
4170 .   mat - the matrix
4171 
4172    Level: advanced
4173 
4174    Notes:
4175        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4176 
4177        The i and j indices are 0 based
4178 
4179        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4180 
4181 
4182 .keywords: matrix, aij, compressed row, sparse, parallel
4183 
4184 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4185           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4186 @*/
4187 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4188 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4189 {
4190   PetscErrorCode ierr;
4191   Mat_MPIAIJ     *maij;
4192 
4193  PetscFunctionBegin;
4194   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4195   if (i[0]) {
4196     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4197   }
4198   if (oi[0]) {
4199     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4200   }
4201   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4202   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4203   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4204   maij = (Mat_MPIAIJ*) (*mat)->data;
4205   maij->donotstash     = PETSC_TRUE;
4206   (*mat)->preallocated = PETSC_TRUE;
4207 
4208   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4209   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4210   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4211 
4212   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4213   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4214 
4215   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4216   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4217   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4218   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4219 
4220   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4221   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4222   PetscFunctionReturn(0);
4223 }
4224 
4225 /*
4226     Special version for direct calls from Fortran
4227 */
4228 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4229 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4230 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4231 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4232 #endif
4233 
4234 /* Change these macros so can be used in void function */
4235 #undef CHKERRQ
4236 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4237 #undef SETERRQ2
4238 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4239 #undef SETERRQ
4240 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4241 
4242 EXTERN_C_BEGIN
4243 #undef __FUNCT__
4244 #define __FUNCT__ "matsetvaluesmpiaij_"
4245 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4246 {
4247   Mat            mat = *mmat;
4248   PetscInt       m = *mm, n = *mn;
4249   InsertMode     addv = *maddv;
4250   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4251   PetscScalar    value;
4252   PetscErrorCode ierr;
4253 
4254   MatPreallocated(mat);
4255   if (mat->insertmode == NOT_SET_VALUES) {
4256     mat->insertmode = addv;
4257   }
4258 #if defined(PETSC_USE_DEBUG)
4259   else if (mat->insertmode != addv) {
4260     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4261   }
4262 #endif
4263   {
4264   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4265   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4266   PetscTruth     roworiented = aij->roworiented;
4267 
4268   /* Some Variables required in the macro */
4269   Mat            A = aij->A;
4270   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4271   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4272   PetscScalar    *aa = a->a;
4273   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4274   Mat            B = aij->B;
4275   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4276   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4277   PetscScalar    *ba = b->a;
4278 
4279   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4280   PetscInt       nonew = a->nonew;
4281   PetscScalar    *ap1,*ap2;
4282 
4283   PetscFunctionBegin;
4284   for (i=0; i<m; i++) {
4285     if (im[i] < 0) continue;
4286 #if defined(PETSC_USE_DEBUG)
4287     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4288 #endif
4289     if (im[i] >= rstart && im[i] < rend) {
4290       row      = im[i] - rstart;
4291       lastcol1 = -1;
4292       rp1      = aj + ai[row];
4293       ap1      = aa + ai[row];
4294       rmax1    = aimax[row];
4295       nrow1    = ailen[row];
4296       low1     = 0;
4297       high1    = nrow1;
4298       lastcol2 = -1;
4299       rp2      = bj + bi[row];
4300       ap2      = ba + bi[row];
4301       rmax2    = bimax[row];
4302       nrow2    = bilen[row];
4303       low2     = 0;
4304       high2    = nrow2;
4305 
4306       for (j=0; j<n; j++) {
4307         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4308         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4309         if (in[j] >= cstart && in[j] < cend){
4310           col = in[j] - cstart;
4311           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4312         } else if (in[j] < 0) continue;
4313 #if defined(PETSC_USE_DEBUG)
4314         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4315 #endif
4316         else {
4317           if (mat->was_assembled) {
4318             if (!aij->colmap) {
4319               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4320             }
4321 #if defined (PETSC_USE_CTABLE)
4322             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4323 	    col--;
4324 #else
4325             col = aij->colmap[in[j]] - 1;
4326 #endif
4327             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4328               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4329               col =  in[j];
4330               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4331               B = aij->B;
4332               b = (Mat_SeqAIJ*)B->data;
4333               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4334               rp2      = bj + bi[row];
4335               ap2      = ba + bi[row];
4336               rmax2    = bimax[row];
4337               nrow2    = bilen[row];
4338               low2     = 0;
4339               high2    = nrow2;
4340               bm       = aij->B->rmap.n;
4341               ba = b->a;
4342             }
4343           } else col = in[j];
4344           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4345         }
4346       }
4347     } else {
4348       if (!aij->donotstash) {
4349         if (roworiented) {
4350           if (ignorezeroentries && v[i*n] == 0.0) continue;
4351           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4352         } else {
4353           if (ignorezeroentries && v[i] == 0.0) continue;
4354           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4355         }
4356       }
4357     }
4358   }}
4359   PetscFunctionReturnVoid();
4360 }
4361 EXTERN_C_END
4362 
4363