xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision da668acc18c7a9333d158e0ba9cea6f118940dd2)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatGetValues_MPIAIJ"
240 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
241 {
242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
243   PetscErrorCode ierr;
244   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
245   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
246 
247   PetscFunctionBegin;
248   for (i=0; i<m; i++) {
249     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
250     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
251     if (idxm[i] >= rstart && idxm[i] < rend) {
252       row = idxm[i] - rstart;
253       for (j=0; j<n; j++) {
254         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
255         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
256         if (idxn[j] >= cstart && idxn[j] < cend){
257           col = idxn[j] - cstart;
258           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
259         } else {
260           if (!aij->colmap) {
261             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
262           }
263 #if defined (PETSC_USE_CTABLE)
264           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
265           col --;
266 #else
267           col = aij->colmap[idxn[j]] - 1;
268 #endif
269           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
270           else {
271             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
272           }
273         }
274       }
275     } else {
276       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
277     }
278   }
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
284 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       nstash,reallocs;
289   InsertMode     addv;
290 
291   PetscFunctionBegin;
292   if (aij->donotstash) {
293     PetscFunctionReturn(0);
294   }
295 
296   /* make sure all processors are either in INSERTMODE or ADDMODE */
297   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
298   if (addv == (ADD_VALUES|INSERT_VALUES)) {
299     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
300   }
301   mat->insertmode = addv; /* in case this processor had no cache */
302 
303   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
304   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
305   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
306   PetscFunctionReturn(0);
307 }
308 
309 #undef __FUNCT__
310 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
311 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
312 {
313   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
314   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
315   PetscErrorCode ierr;
316   PetscMPIInt    n;
317   PetscInt       i,j,rstart,ncols,flg;
318   PetscInt       *row,*col,other_disassembled;
319   PetscScalar    *val;
320   InsertMode     addv = mat->insertmode;
321 
322   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
358     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
359   }
360   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
361   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
362   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
363   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
364 
365   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
366   aij->rowvalues = 0;
367 
368   /* used by MatAXPY() */
369   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
370   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
371 
372   PetscFunctionReturn(0);
373 }
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
377 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
378 {
379   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
380   PetscErrorCode ierr;
381 
382   PetscFunctionBegin;
383   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
384   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatZeroRows_MPIAIJ"
390 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
391 {
392   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
393   PetscErrorCode ierr;
394   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
395   PetscInt       i,*owners = A->rmap.range;
396   PetscInt       *nprocs,j,idx,nsends,row;
397   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
398   PetscInt       *rvalues,count,base,slen,*source;
399   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
400   MPI_Comm       comm = A->comm;
401   MPI_Request    *send_waits,*recv_waits;
402   MPI_Status     recv_status,*send_status;
403 #if defined(PETSC_DEBUG)
404   PetscTruth     found = PETSC_FALSE;
405 #endif
406 
407   PetscFunctionBegin;
408   /*  first count number of contributors to each processor */
409   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
410   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
411   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
412   j = 0;
413   for (i=0; i<N; i++) {
414     if (lastidx > (idx = rows[i])) j = 0;
415     lastidx = idx;
416     for (; j<size; j++) {
417       if (idx >= owners[j] && idx < owners[j+1]) {
418         nprocs[2*j]++;
419         nprocs[2*j+1] = 1;
420         owner[i] = j;
421 #if defined(PETSC_DEBUG)
422         found = PETSC_TRUE;
423 #endif
424         break;
425       }
426     }
427 #if defined(PETSC_DEBUG)
428     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
429     found = PETSC_FALSE;
430 #endif
431   }
432   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
433 
434   /* inform other processors of number of messages and max length*/
435   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
436 
437   /* post receives:   */
438   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
439   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
440   for (i=0; i<nrecvs; i++) {
441     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
442   }
443 
444   /* do sends:
445       1) starts[i] gives the starting index in svalues for stuff going to
446          the ith processor
447   */
448   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
449   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
450   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
451   starts[0] = 0;
452   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
453   for (i=0; i<N; i++) {
454     svalues[starts[owner[i]]++] = rows[i];
455   }
456 
457   starts[0] = 0;
458   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
459   count = 0;
460   for (i=0; i<size; i++) {
461     if (nprocs[2*i+1]) {
462       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
463     }
464   }
465   ierr = PetscFree(starts);CHKERRQ(ierr);
466 
467   base = owners[rank];
468 
469   /*  wait on receives */
470   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
471   source = lens + nrecvs;
472   count  = nrecvs; slen = 0;
473   while (count) {
474     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
475     /* unpack receives into our local space */
476     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
477     source[imdex]  = recv_status.MPI_SOURCE;
478     lens[imdex]    = n;
479     slen          += n;
480     count--;
481   }
482   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
483 
484   /* move the data into the send scatter */
485   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
486   count = 0;
487   for (i=0; i<nrecvs; i++) {
488     values = rvalues + i*nmax;
489     for (j=0; j<lens[i]; j++) {
490       lrows[count++] = values[j] - base;
491     }
492   }
493   ierr = PetscFree(rvalues);CHKERRQ(ierr);
494   ierr = PetscFree(lens);CHKERRQ(ierr);
495   ierr = PetscFree(owner);CHKERRQ(ierr);
496   ierr = PetscFree(nprocs);CHKERRQ(ierr);
497 
498   /* actually zap the local rows */
499   /*
500         Zero the required rows. If the "diagonal block" of the matrix
501      is square and the user wishes to set the diagonal we use separate
502      code so that MatSetValues() is not called for each diagonal allocating
503      new memory, thus calling lots of mallocs and slowing things down.
504 
505        Contributed by: Matthew Knepley
506   */
507   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
508   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
509   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
510     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
511   } else if (diag != 0.0) {
512     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
513     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
514       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
515 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
516     }
517     for (i = 0; i < slen; i++) {
518       row  = lrows[i] + rstart;
519       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
520     }
521     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
522     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523   } else {
524     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
525   }
526   ierr = PetscFree(lrows);CHKERRQ(ierr);
527 
528   /* wait on sends */
529   if (nsends) {
530     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
531     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
532     ierr = PetscFree(send_status);CHKERRQ(ierr);
533   }
534   ierr = PetscFree(send_waits);CHKERRQ(ierr);
535   ierr = PetscFree(svalues);CHKERRQ(ierr);
536 
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMult_MPIAIJ"
542 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
543 {
544   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
545   PetscErrorCode ierr;
546   PetscInt       nt;
547 
548   PetscFunctionBegin;
549   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
550   if (nt != A->cmap.n) {
551     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
552   }
553   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
554   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
555   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
556   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatMultAdd_MPIAIJ"
562 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
563 {
564   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
565   PetscErrorCode ierr;
566 
567   PetscFunctionBegin;
568   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
569   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
570   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
571   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
577 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
578 {
579   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
580   PetscErrorCode ierr;
581   PetscTruth     merged;
582 
583   PetscFunctionBegin;
584   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
585   /* do nondiagonal part */
586   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
587   if (!merged) {
588     /* send it on its way */
589     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
590     /* do local part */
591     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
592     /* receive remote parts: note this assumes the values are not actually */
593     /* added in yy until the next line, */
594     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595   } else {
596     /* do local part */
597     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
598     /* send it on its way */
599     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600     /* values actually were received in the Begin() but we need to call this nop */
601     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 EXTERN_C_BEGIN
607 #undef __FUNCT__
608 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
609 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
610 {
611   MPI_Comm       comm;
612   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
613   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
614   IS             Me,Notme;
615   PetscErrorCode ierr;
616   PetscInt       M,N,first,last,*notme,i;
617   PetscMPIInt    size;
618 
619   PetscFunctionBegin;
620 
621   /* Easy test: symmetric diagonal block */
622   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
623   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
624   if (!*f) PetscFunctionReturn(0);
625   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
626   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
627   if (size == 1) PetscFunctionReturn(0);
628 
629   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
630   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
631   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
632   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
633   for (i=0; i<first; i++) notme[i] = i;
634   for (i=last; i<M; i++) notme[i-last+first] = i;
635   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
636   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
637   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
638   Aoff = Aoffs[0];
639   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
640   Boff = Boffs[0];
641   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
642   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
644   ierr = ISDestroy(Me);CHKERRQ(ierr);
645   ierr = ISDestroy(Notme);CHKERRQ(ierr);
646 
647   PetscFunctionReturn(0);
648 }
649 EXTERN_C_END
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
653 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
654 {
655   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
656   PetscErrorCode ierr;
657 
658   PetscFunctionBegin;
659   /* do nondiagonal part */
660   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
661   /* send it on its way */
662   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
663   /* do local part */
664   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
665   /* receive remote parts */
666   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
667   PetscFunctionReturn(0);
668 }
669 
670 /*
671   This only works correctly for square matrices where the subblock A->A is the
672    diagonal block
673 */
674 #undef __FUNCT__
675 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
676 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
677 {
678   PetscErrorCode ierr;
679   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
680 
681   PetscFunctionBegin;
682   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
683   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
684     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
685   }
686   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatScale_MPIAIJ"
692 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
693 {
694   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
695   PetscErrorCode ierr;
696 
697   PetscFunctionBegin;
698   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
699   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatDestroy_MPIAIJ"
705 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
706 {
707   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711 #if defined(PETSC_USE_LOG)
712   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
713 #endif
714   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
715   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
717 #if defined (PETSC_USE_CTABLE)
718   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
719 #else
720   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
721 #endif
722   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
723   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
724   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
725   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
726   ierr = PetscFree(aij);CHKERRQ(ierr);
727 
728   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRelax_MPIAIJ"
1014 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1015 {
1016   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1017   PetscErrorCode ierr;
1018   Vec            bb1;
1019 
1020   PetscFunctionBegin;
1021   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1022 
1023   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1024 
1025   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1026     if (flag & SOR_ZERO_INITIAL_GUESS) {
1027       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1028       its--;
1029     }
1030 
1031     while (its--) {
1032       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1034 
1035       /* update rhs: bb1 = bb - B*x */
1036       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1037       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1038 
1039       /* local sweep */
1040       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1041       CHKERRQ(ierr);
1042     }
1043   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1044     if (flag & SOR_ZERO_INITIAL_GUESS) {
1045       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1046       its--;
1047     }
1048     while (its--) {
1049       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1051 
1052       /* update rhs: bb1 = bb - B*x */
1053       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1054       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1055 
1056       /* local sweep */
1057       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1058       CHKERRQ(ierr);
1059     }
1060   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1061     if (flag & SOR_ZERO_INITIAL_GUESS) {
1062       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1063       its--;
1064     }
1065     while (its--) {
1066       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1068 
1069       /* update rhs: bb1 = bb - B*x */
1070       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1071       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1072 
1073       /* local sweep */
1074       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1075       CHKERRQ(ierr);
1076     }
1077   } else {
1078     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1079   }
1080 
1081   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1082   PetscFunctionReturn(0);
1083 }
1084 
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatPermute_MPIAIJ"
1087 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1088 {
1089   MPI_Comm       comm,pcomm;
1090   PetscInt       first,local_size,nrows,*rows;
1091   int            ntids;
1092   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1097   /* make a collective version of 'rowp' */
1098   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1099   if (pcomm==comm) {
1100     crowp = rowp;
1101   } else {
1102     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1103     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1104     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1105     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1106   }
1107   /* collect the global row permutation and invert it */
1108   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1109   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1110   if (pcomm!=comm) {
1111     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1112   }
1113   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1114   /* get the local target indices */
1115   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1116   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1117   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1118   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1119   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1121   /* the column permutation is so much easier;
1122      make a local version of 'colp' and invert it */
1123   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1124   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1125   if (ntids==1) {
1126     lcolp = colp;
1127   } else {
1128     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1129     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1130     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1131   }
1132   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1133   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1134   if (ntids>1) {
1135     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1137   }
1138   /* now we just get the submatrix */
1139   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1140   /* clean up */
1141   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1142   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1148 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1151   Mat            A = mat->A,B = mat->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = 1.0;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   info->rows_global       = (double)matin->rmap.N;
1188   info->columns_global    = (double)matin->cmap.N;
1189   info->rows_local        = (double)matin->rmap.n;
1190   info->columns_local     = (double)matin->cmap.N;
1191 
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPIAIJ"
1197 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1198 {
1199   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NO_NEW_NONZERO_LOCATIONS:
1205   case MAT_YES_NEW_NONZERO_LOCATIONS:
1206   case MAT_COLUMNS_UNSORTED:
1207   case MAT_COLUMNS_SORTED:
1208   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1209   case MAT_KEEP_ZEROED_ROWS:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211   case MAT_USE_INODES:
1212   case MAT_DO_NOT_USE_INODES:
1213   case MAT_IGNORE_ZERO_ENTRIES:
1214     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1216     break;
1217   case MAT_ROW_ORIENTED:
1218     a->roworiented = PETSC_TRUE;
1219     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1220     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1221     break;
1222   case MAT_ROWS_SORTED:
1223   case MAT_ROWS_UNSORTED:
1224   case MAT_YES_NEW_DIAGONALS:
1225     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1226     break;
1227   case MAT_COLUMN_ORIENTED:
1228     a->roworiented = PETSC_FALSE;
1229     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1230     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1231     break;
1232   case MAT_IGNORE_OFF_PROC_ENTRIES:
1233     a->donotstash = PETSC_TRUE;
1234     break;
1235   case MAT_NO_NEW_DIAGONALS:
1236     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241   case MAT_HERMITIAN:
1242   case MAT_SYMMETRY_ETERNAL:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     break;
1245   case MAT_NOT_SYMMETRIC:
1246   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1247   case MAT_NOT_HERMITIAN:
1248   case MAT_NOT_SYMMETRY_ETERNAL:
1249     break;
1250   default:
1251     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRow_MPIAIJ"
1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1264   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1265   PetscInt       *cmap,*idx_p;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1276     PetscInt     max = 1,tmp;
1277     for (i=0; i<matin->rmap.n; i++) {
1278       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1279       if (max < tmp) { max = tmp; }
1280     }
1281     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1282     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1283   }
1284 
1285   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1286   lrow = row - rstart;
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap  = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]];
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!aij->getrowactive) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1346   }
1347   aij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatNorm_MPIAIJ"
1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1354 {
1355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1356   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,cstart = mat->cmap.rstart;
1359   PetscReal      sum = 0.0;
1360   PetscScalar    *v;
1361 
1362   PetscFunctionBegin;
1363   if (aij->size == 1) {
1364     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1365   } else {
1366     if (type == NORM_FROBENIUS) {
1367       v = amat->a;
1368       for (i=0; i<amat->nz; i++) {
1369 #if defined(PETSC_USE_COMPLEX)
1370         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1371 #else
1372         sum += (*v)*(*v); v++;
1373 #endif
1374       }
1375       v = bmat->a;
1376       for (i=0; i<bmat->nz; i++) {
1377 #if defined(PETSC_USE_COMPLEX)
1378         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1379 #else
1380         sum += (*v)*(*v); v++;
1381 #endif
1382       }
1383       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1384       *norm = sqrt(*norm);
1385     } else if (type == NORM_1) { /* max column norm */
1386       PetscReal *tmp,*tmp2;
1387       PetscInt    *jj,*garray = aij->garray;
1388       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1390       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1391       *norm = 0.0;
1392       v = amat->a; jj = amat->j;
1393       for (j=0; j<amat->nz; j++) {
1394         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1395       }
1396       v = bmat->a; jj = bmat->j;
1397       for (j=0; j<bmat->nz; j++) {
1398         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1399       }
1400       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1401       for (j=0; j<mat->cmap.N; j++) {
1402         if (tmp2[j] > *norm) *norm = tmp2[j];
1403       }
1404       ierr = PetscFree(tmp);CHKERRQ(ierr);
1405       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1406     } else if (type == NORM_INFINITY) { /* max row norm */
1407       PetscReal ntemp = 0.0;
1408       for (j=0; j<aij->A->rmap.n; j++) {
1409         v = amat->a + amat->i[j];
1410         sum = 0.0;
1411         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1412           sum += PetscAbsScalar(*v); v++;
1413         }
1414         v = bmat->a + bmat->i[j];
1415         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         if (sum > ntemp) ntemp = sum;
1419       }
1420       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1421     } else {
1422       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1423     }
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatTranspose_MPIAIJ"
1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1431 {
1432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1433   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1434   PetscErrorCode ierr;
1435   PetscInt       M = A->rmap.N,N = A->cmap.N,ma,na,mb,*ai,*aj,*bi,*bj,row,*cols,i,*d_nnz;
1436   PetscInt       cstart=A->cmap.rstart,ncol;
1437   Mat            B;
1438   PetscScalar    *array;
1439 
1440   PetscFunctionBegin;
1441   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442 
1443   /* compute d_nnz for preallocation; o_nnz is approximated by d_nnz to avoid communication */
1444   ma = A->rmap.n; na = A->cmap.n; mb = a->B->rmap.n;
1445   ai = Aloc->i; aj = Aloc->j;
1446   bi = Bloc->i; bj = Bloc->j;
1447   ierr = PetscMalloc((1+na+bi[mb])*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1448   cols = d_nnz + na + 1; /* work space to be used by B part */
1449   ierr = PetscMemzero(d_nnz,(1+na)*sizeof(PetscInt));CHKERRQ(ierr);
1450   for (i=0; i<ai[ma]; i++){
1451     d_nnz[aj[i]] ++;
1452     aj[i] += cstart; /* global col index to be used by MatSetValues() */
1453   }
1454 
1455   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1456   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1457   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1458   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,d_nnz);CHKERRQ(ierr);
1459 
1460   /* copy over the A part */
1461   array = Aloc->a;
1462   row = A->rmap.rstart;
1463   for (i=0; i<ma; i++) {
1464     ncol = ai[i+1]-ai[i];
1465     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1466     row++; array += ncol; aj += ncol;
1467   }
1468   aj = Aloc->j;
1469   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1470 
1471   /* copy over the B part */
1472   array = Bloc->a;
1473   row = A->rmap.rstart;
1474   for (i=0; i<bi[mb]; i++) {cols[i] = a->garray[bj[i]];}
1475   for (i=0; i<mb; i++) {
1476     ncol = bi[i+1]-bi[i];
1477     ierr = MatSetValues(B,ncol,cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1478     row++; array += ncol; cols += ncol;
1479   }
1480   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1481   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1482   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1483   if (matout) {
1484     *matout = B;
1485   } else {
1486     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1493 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1494 {
1495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1496   Mat            a = aij->A,b = aij->B;
1497   PetscErrorCode ierr;
1498   PetscInt       s1,s2,s3;
1499 
1500   PetscFunctionBegin;
1501   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1502   if (rr) {
1503     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1504     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1505     /* Overlap communication with computation. */
1506     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1507   }
1508   if (ll) {
1509     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1510     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1511     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1512   }
1513   /* scale  the diagonal block */
1514   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1515 
1516   if (rr) {
1517     /* Do a scatter end and then right scale the off-diagonal block */
1518     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1519     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1520   }
1521 
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1527 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1528 {
1529   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1534   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1539 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1540 {
1541   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatEqual_MPIAIJ"
1551 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1552 {
1553   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1554   Mat            a,b,c,d;
1555   PetscTruth     flg;
1556   PetscErrorCode ierr;
1557 
1558   PetscFunctionBegin;
1559   a = matA->A; b = matA->B;
1560   c = matB->A; d = matB->B;
1561 
1562   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1563   if (flg) {
1564     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1565   }
1566   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatCopy_MPIAIJ"
1572 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1573 {
1574   PetscErrorCode ierr;
1575   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1576   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1577 
1578   PetscFunctionBegin;
1579   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1580   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1581     /* because of the column compression in the off-processor part of the matrix a->B,
1582        the number of columns in a->B and b->B may be different, hence we cannot call
1583        the MatCopy() directly on the two parts. If need be, we can provide a more
1584        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1585        then copying the submatrices */
1586     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1587   } else {
1588     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1589     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1590   }
1591   PetscFunctionReturn(0);
1592 }
1593 
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1596 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1597 {
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #include "petscblaslapack.h"
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatAXPY_MPIAIJ"
1608 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1609 {
1610   PetscErrorCode ierr;
1611   PetscInt       i;
1612   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1613   PetscBLASInt   bnz,one=1;
1614   Mat_SeqAIJ     *x,*y;
1615 
1616   PetscFunctionBegin;
1617   if (str == SAME_NONZERO_PATTERN) {
1618     PetscScalar alpha = a;
1619     x = (Mat_SeqAIJ *)xx->A->data;
1620     y = (Mat_SeqAIJ *)yy->A->data;
1621     bnz = (PetscBLASInt)x->nz;
1622     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1623     x = (Mat_SeqAIJ *)xx->B->data;
1624     y = (Mat_SeqAIJ *)yy->B->data;
1625     bnz = (PetscBLASInt)x->nz;
1626     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1627   } else if (str == SUBSET_NONZERO_PATTERN) {
1628     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1629 
1630     x = (Mat_SeqAIJ *)xx->B->data;
1631     y = (Mat_SeqAIJ *)yy->B->data;
1632     if (y->xtoy && y->XtoY != xx->B) {
1633       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1634       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1635     }
1636     if (!y->xtoy) { /* get xtoy */
1637       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1638       y->XtoY = xx->B;
1639       ierr = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
1640     }
1641     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1642   } else {
1643     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatConjugate_MPIAIJ"
1652 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1653 {
1654 #if defined(PETSC_USE_COMPLEX)
1655   PetscErrorCode ierr;
1656   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1657 
1658   PetscFunctionBegin;
1659   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1660   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1661 #else
1662   PetscFunctionBegin;
1663 #endif
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatRealPart_MPIAIJ"
1669 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1670 {
1671   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1676   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1682 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1683 {
1684   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1685   PetscErrorCode ierr;
1686 
1687   PetscFunctionBegin;
1688   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1689   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #ifdef PETSC_HAVE_PBGL
1694 
1695 #include <boost/parallel/mpi/bsp_process_group.hpp>
1696 #include <boost/graph/distributed/ilu_default_graph.hpp>
1697 #include <boost/graph/distributed/ilu_0_block.hpp>
1698 #include <boost/graph/distributed/ilu_preconditioner.hpp>
1699 #include <boost/graph/distributed/petsc/interface.hpp>
1700 #include <boost/multi_array.hpp>
1701 #include <boost/parallel/distributed_property_map.hpp>
1702 
1703 #undef __FUNCT__
1704 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
1705 /*
1706   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1707 */
1708 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat A, IS isrow, IS iscol, MatFactorInfo *info, Mat *fact)
1709 {
1710   namespace petsc = boost::distributed::petsc;
1711 
1712   namespace graph_dist = boost::graph::distributed;
1713   using boost::graph::distributed::ilu_default::process_group_type;
1714   using boost::graph::ilu_permuted;
1715 
1716   PetscTruth      row_identity, col_identity;
1717   PetscContainer  c;
1718   PetscInt        m, n, M, N;
1719   PetscErrorCode  ierr;
1720 
1721   PetscFunctionBegin;
1722   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
1723   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
1724   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
1725   if (!row_identity || !col_identity) {
1726     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
1727   }
1728 
1729   process_group_type pg;
1730   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1731   lgraph_type*   lgraph_p = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
1732   lgraph_type&   level_graph = *lgraph_p;
1733   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1734 
1735   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
1736   ilu_permuted(level_graph);
1737 
1738   /* put together the new matrix */
1739   ierr = MatCreate(A->comm, fact);CHKERRQ(ierr);
1740   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
1741   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
1742   ierr = MatSetSizes(*fact, m, n, M, N);CHKERRQ(ierr);
1743   ierr = MatSetType(*fact, A->type_name);CHKERRQ(ierr);
1744   ierr = MatAssemblyBegin(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1745   ierr = MatAssemblyEnd(*fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1746   (*fact)->factor = FACTOR_LU;
1747 
1748   ierr = PetscContainerCreate(A->comm, &c);
1749   ierr = PetscContainerSetPointer(c, lgraph_p);
1750   ierr = PetscObjectCompose((PetscObject) (*fact), "graph", (PetscObject) c);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
1756 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat A, MatFactorInfo *info, Mat *B)
1757 {
1758   PetscFunctionBegin;
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatSolve_MPIAIJ"
1764 /*
1765   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
1766 */
1767 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
1768 {
1769   namespace graph_dist = boost::graph::distributed;
1770 
1771   typedef graph_dist::ilu_default::ilu_level_graph_type  lgraph_type;
1772   lgraph_type*   lgraph_p;
1773   PetscContainer c;
1774   PetscErrorCode ierr;
1775 
1776   PetscFunctionBegin;
1777   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject *) &c);CHKERRQ(ierr);
1778   ierr = PetscContainerGetPointer(c, (void **) &lgraph_p);CHKERRQ(ierr);
1779   ierr = VecCopy(b, x); CHKERRQ(ierr);
1780 
1781   PetscScalar* array_x;
1782   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
1783   PetscInt sx;
1784   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
1785 
1786   PetscScalar* array_b;
1787   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
1788   PetscInt sb;
1789   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
1790 
1791   lgraph_type&   level_graph = *lgraph_p;
1792   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
1793 
1794   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
1795   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]),
1796                                                  ref_x(array_x, boost::extents[num_vertices(graph)]);
1797 
1798   typedef boost::iterator_property_map<array_ref_type::iterator,
1799                                 boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
1800   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph)),
1801                                                  vector_x(ref_x.begin(), get(boost::vertex_index, graph));
1802 
1803   ilu_set_solve(*lgraph_p, vector_b, vector_x);
1804 
1805   PetscFunctionReturn(0);
1806 }
1807 #endif
1808 
1809 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
1810   PetscInt       nzlocal,nsends,nrecvs;
1811   PetscInt       *send_rank,*sbuf_nz,*sbuf_j,**rbuf_j;
1812   PetscScalar    *sbuf_a,**rbuf_a;
1813   PetscErrorCode (*MatDestroy)(Mat);
1814 } Mat_Redundant;
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
1818 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
1819 {
1820   PetscErrorCode       ierr;
1821   Mat_Redundant        *redund=(Mat_Redundant*)ptr;
1822   PetscInt             i;
1823 
1824   PetscFunctionBegin;
1825   ierr = PetscFree(redund->send_rank);CHKERRQ(ierr);
1826   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1827   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1828   for (i=0; i<redund->nrecvs; i++){
1829     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1830     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1831   }
1832   ierr = PetscFree3(redund->sbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1833   ierr = PetscFree(redund);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatDestroy_MatRedundant"
1839 PetscErrorCode MatDestroy_MatRedundant(Mat A)
1840 {
1841   PetscErrorCode  ierr;
1842   PetscContainer  container;
1843   Mat_Redundant   *redund=PETSC_NULL;
1844 
1845   PetscFunctionBegin;
1846   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1847   if (container) {
1848     ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1849   } else {
1850     SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1851   }
1852   A->ops->destroy = redund->MatDestroy;
1853   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
1854   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
1855   ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
1861 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
1862 {
1863   PetscMPIInt    rank,size;
1864   MPI_Comm       comm=mat->comm;
1865   PetscErrorCode ierr;
1866   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0;
1867   PetscMPIInt    *send_rank=PETSC_NULL,*recv_rank=PETSC_NULL;
1868   PetscInt       *rowrange=mat->rmap.range;
1869   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1870   Mat            A=aij->A,B=aij->B,C=*matredundant;
1871   Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
1872   PetscScalar    *sbuf_a;
1873   PetscInt       nzlocal=a->nz+b->nz;
1874   PetscInt       j,cstart=mat->cmap.rstart,cend=mat->cmap.rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
1875   PetscInt       rstart=mat->rmap.rstart,rend=mat->rmap.rend,*bmap=aij->garray,M,N;
1876   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
1877   PetscScalar    *vals,*aworkA,*aworkB;
1878   PetscMPIInt    tag1,tag2,tag3,imdex;
1879   MPI_Request    *s_waits1=PETSC_NULL,*s_waits2=PETSC_NULL,*s_waits3=PETSC_NULL,
1880                  *r_waits1=PETSC_NULL,*r_waits2=PETSC_NULL,*r_waits3=PETSC_NULL;
1881   MPI_Status     recv_status,*send_status;
1882   PetscInt       *sbuf_nz=PETSC_NULL,*rbuf_nz=PETSC_NULL,count;
1883   PetscInt       **rbuf_j=PETSC_NULL;
1884   PetscScalar    **rbuf_a=PETSC_NULL;
1885   Mat_Redundant  *redund=PETSC_NULL;
1886   PetscContainer container;
1887 
1888   PetscFunctionBegin;
1889   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1890   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1891 
1892   if (reuse == MAT_REUSE_MATRIX) {
1893     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
1894     if (M != N || M != mat->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
1895     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
1896     if (M != N || M != mlocal_sub) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
1897     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject *)&container);CHKERRQ(ierr);
1898     if (container) {
1899       ierr = PetscContainerGetPointer(container,(void **)&redund);CHKERRQ(ierr);
1900     } else {
1901       SETERRQ(PETSC_ERR_PLIB,"Container does not exit");
1902     }
1903     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
1904 
1905     nsends    = redund->nsends;
1906     nrecvs    = redund->nrecvs;
1907     send_rank = redund->send_rank; recv_rank = send_rank + size;
1908     sbuf_nz   = redund->sbuf_nz;     rbuf_nz = sbuf_nz + nsends;
1909     sbuf_j    = redund->sbuf_j;
1910     sbuf_a    = redund->sbuf_a;
1911     rbuf_j    = redund->rbuf_j;
1912     rbuf_a    = redund->rbuf_a;
1913   }
1914 
1915   if (reuse == MAT_INITIAL_MATRIX){
1916     PetscMPIInt  subrank,subsize;
1917     PetscInt     nleftover,np_subcomm;
1918     /* get the destination processors' id send_rank, nsends and nrecvs */
1919     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
1920     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
1921     ierr = PetscMalloc((2*size+1)*sizeof(PetscMPIInt),&send_rank);
1922     recv_rank = send_rank + size;
1923     np_subcomm = size/nsubcomm;
1924     nleftover  = size - nsubcomm*np_subcomm;
1925     nsends = 0; nrecvs = 0;
1926     for (i=0; i<size; i++){ /* i=rank*/
1927       if (subrank == i/nsubcomm && rank != i){ /* my_subrank == other's subrank */
1928         send_rank[nsends] = i; nsends++;
1929         recv_rank[nrecvs++] = i;
1930       }
1931     }
1932     if (rank >= size - nleftover){/* this proc is a leftover processor */
1933       i = size-nleftover-1;
1934       j = 0;
1935       while (j < nsubcomm - nleftover){
1936         send_rank[nsends++] = i;
1937         i--; j++;
1938       }
1939     }
1940 
1941     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1){ /* this proc recvs from leftover processors */
1942       for (i=0; i<nleftover; i++){
1943         recv_rank[nrecvs++] = size-nleftover+i;
1944       }
1945     }
1946 
1947     /* allocate sbuf_j, sbuf_a */
1948     i = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
1949     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
1950     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
1951   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
1952 
1953   /* copy mat's local entries into the buffers */
1954   if (reuse == MAT_INITIAL_MATRIX){
1955     rownz_max = 0;
1956     rptr = sbuf_j;
1957     cols = sbuf_j + rend-rstart + 1;
1958     vals = sbuf_a;
1959     rptr[0] = 0;
1960     for (i=0; i<rend-rstart; i++){
1961       row = i + rstart;
1962       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1963       ncols  = nzA + nzB;
1964       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1965       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
1966       /* load the column indices for this row into cols */
1967       lwrite = 0;
1968       for (l=0; l<nzB; l++) {
1969         if ((ctmp = bmap[cworkB[l]]) < cstart){
1970           vals[lwrite]   = aworkB[l];
1971           cols[lwrite++] = ctmp;
1972         }
1973       }
1974       for (l=0; l<nzA; l++){
1975         vals[lwrite]   = aworkA[l];
1976         cols[lwrite++] = cstart + cworkA[l];
1977       }
1978       for (l=0; l<nzB; l++) {
1979         if ((ctmp = bmap[cworkB[l]]) >= cend){
1980           vals[lwrite]   = aworkB[l];
1981           cols[lwrite++] = ctmp;
1982         }
1983       }
1984       vals += ncols;
1985       cols += ncols;
1986       rptr[i+1] = rptr[i] + ncols;
1987       if (rownz_max < ncols) rownz_max = ncols;
1988     }
1989     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(1, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
1990   } else { /* only copy matrix values into sbuf_a */
1991     rptr = sbuf_j;
1992     vals = sbuf_a;
1993     rptr[0] = 0;
1994     for (i=0; i<rend-rstart; i++){
1995       row = i + rstart;
1996       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
1997       ncols  = nzA + nzB;
1998       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
1999       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2000       lwrite = 0;
2001       for (l=0; l<nzB; l++) {
2002         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2003       }
2004       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2005       for (l=0; l<nzB; l++) {
2006         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2007       }
2008       vals += ncols;
2009       rptr[i+1] = rptr[i] + ncols;
2010     }
2011   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2012 
2013   /* send nzlocal to others, and recv other's nzlocal */
2014   /*--------------------------------------------------*/
2015   if (reuse == MAT_INITIAL_MATRIX){
2016     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2017     s_waits2 = s_waits3 + nsends;
2018     s_waits1 = s_waits2 + nsends;
2019     r_waits1 = s_waits1 + nsends;
2020     r_waits2 = r_waits1 + nrecvs;
2021     r_waits3 = r_waits2 + nrecvs;
2022   } else {
2023     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2024     r_waits3 = s_waits3 + nsends;
2025   }
2026 
2027   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2028   if (reuse == MAT_INITIAL_MATRIX){
2029     /* get new tags to keep the communication clean */
2030     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2031     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2032     ierr = PetscMalloc3(nsends+nrecvs+1,PetscInt,&sbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2033     rbuf_nz = sbuf_nz + nsends;
2034 
2035     /* post receives of other's nzlocal */
2036     for (i=0; i<nrecvs; i++){
2037       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2038     }
2039     /* send nzlocal to others */
2040     for (i=0; i<nsends; i++){
2041       sbuf_nz[i] = nzlocal;
2042       ierr = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2043     }
2044     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2045     count = nrecvs;
2046     while (count) {
2047       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2048       recv_rank[imdex] = recv_status.MPI_SOURCE;
2049       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2050       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2051 
2052       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2053       rbuf_nz[imdex] += i + 2;
2054       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2055       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2056       count--;
2057     }
2058     /* wait on sends of nzlocal */
2059     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2060     /* send mat->i,j to others, and recv from other's */
2061     /*------------------------------------------------*/
2062     for (i=0; i<nsends; i++){
2063       j = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2064       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2065     }
2066     /* wait on receives of mat->i,j */
2067     /*------------------------------*/
2068     count = nrecvs;
2069     while (count) {
2070       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2071       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2072       count--;
2073     }
2074     /* wait on sends of mat->i,j */
2075     /*---------------------------*/
2076     if (nsends) {
2077       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2078     }
2079   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2080 
2081   /* post receives, send and receive mat->a */
2082   /*----------------------------------------*/
2083   for (imdex=0; imdex<nrecvs; imdex++) {
2084     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2085   }
2086   for (i=0; i<nsends; i++){
2087     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2088   }
2089   count = nrecvs;
2090   while (count) {
2091     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2092     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2093     count--;
2094   }
2095   if (nsends) {
2096     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2097   }
2098 
2099   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2100 
2101   /* create redundant matrix */
2102   /*-------------------------*/
2103   if (reuse == MAT_INITIAL_MATRIX){
2104     /* compute rownz_max for preallocation */
2105     for (imdex=0; imdex<nrecvs; imdex++){
2106       j = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2107       rptr = rbuf_j[imdex];
2108       for (i=0; i<j; i++){
2109         ncols = rptr[i+1] - rptr[i];
2110         if (rownz_max < ncols) rownz_max = ncols;
2111       }
2112     }
2113 
2114     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2115     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2116     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2117     ierr = MatSeqAIJSetPreallocation(C,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2118     ierr = MatMPIAIJSetPreallocation(C,rownz_max,PETSC_NULL,rownz_max,PETSC_NULL);CHKERRQ(ierr);
2119   } else {
2120     C = *matredundant;
2121   }
2122 
2123   /* insert local matrix entries */
2124   rptr = sbuf_j;
2125   cols = sbuf_j + rend-rstart + 1;
2126   vals = sbuf_a;
2127   for (i=0; i<rend-rstart; i++){
2128     row   = i + rstart;
2129     ncols = rptr[i+1] - rptr[i];
2130     ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2131     vals += ncols;
2132     cols += ncols;
2133   }
2134   /* insert received matrix entries */
2135   for (imdex=0; imdex<nrecvs; imdex++){
2136     rstart = rowrange[recv_rank[imdex]];
2137     rend   = rowrange[recv_rank[imdex]+1];
2138     rptr = rbuf_j[imdex];
2139     cols = rbuf_j[imdex] + rend-rstart + 1;
2140     vals = rbuf_a[imdex];
2141     for (i=0; i<rend-rstart; i++){
2142       row   = i + rstart;
2143       ncols = rptr[i+1] - rptr[i];
2144       ierr = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2145       vals += ncols;
2146       cols += ncols;
2147     }
2148   }
2149   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2152   if (M != mat->rmap.N || N != mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap.N);
2153   if (reuse == MAT_INITIAL_MATRIX){
2154     PetscContainer container;
2155     *matredundant = C;
2156     /* create a supporting struct and attach it to C for reuse */
2157     ierr = PetscNew(Mat_Redundant,&redund);CHKERRQ(ierr);
2158     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2159     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2160     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2161     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2162 
2163     redund->nzlocal = nzlocal;
2164     redund->nsends  = nsends;
2165     redund->nrecvs  = nrecvs;
2166     redund->send_rank = send_rank;
2167     redund->sbuf_nz = sbuf_nz;
2168     redund->sbuf_j  = sbuf_j;
2169     redund->sbuf_a  = sbuf_a;
2170     redund->rbuf_j  = rbuf_j;
2171     redund->rbuf_a  = rbuf_a;
2172 
2173     redund->MatDestroy = C->ops->destroy;
2174     C->ops->destroy    = MatDestroy_MatRedundant;
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 /* -------------------------------------------------------------------*/
2180 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2181        MatGetRow_MPIAIJ,
2182        MatRestoreRow_MPIAIJ,
2183        MatMult_MPIAIJ,
2184 /* 4*/ MatMultAdd_MPIAIJ,
2185        MatMultTranspose_MPIAIJ,
2186        MatMultTransposeAdd_MPIAIJ,
2187 #ifdef PETSC_HAVE_PBGL
2188        MatSolve_MPIAIJ,
2189 #else
2190        0,
2191 #endif
2192        0,
2193        0,
2194 /*10*/ 0,
2195        0,
2196        0,
2197        MatRelax_MPIAIJ,
2198        MatTranspose_MPIAIJ,
2199 /*15*/ MatGetInfo_MPIAIJ,
2200        MatEqual_MPIAIJ,
2201        MatGetDiagonal_MPIAIJ,
2202        MatDiagonalScale_MPIAIJ,
2203        MatNorm_MPIAIJ,
2204 /*20*/ MatAssemblyBegin_MPIAIJ,
2205        MatAssemblyEnd_MPIAIJ,
2206        0,
2207        MatSetOption_MPIAIJ,
2208        MatZeroEntries_MPIAIJ,
2209 /*25*/ MatZeroRows_MPIAIJ,
2210        0,
2211 #ifdef PETSC_HAVE_PBGL
2212        MatLUFactorNumeric_MPIAIJ,
2213 #else
2214        0,
2215 #endif
2216        0,
2217        0,
2218 /*30*/ MatSetUpPreallocation_MPIAIJ,
2219 #ifdef PETSC_HAVE_PBGL
2220        MatILUFactorSymbolic_MPIAIJ,
2221 #else
2222        0,
2223 #endif
2224        0,
2225        0,
2226        0,
2227 /*35*/ MatDuplicate_MPIAIJ,
2228        0,
2229        0,
2230        0,
2231        0,
2232 /*40*/ MatAXPY_MPIAIJ,
2233        MatGetSubMatrices_MPIAIJ,
2234        MatIncreaseOverlap_MPIAIJ,
2235        MatGetValues_MPIAIJ,
2236        MatCopy_MPIAIJ,
2237 /*45*/ 0,
2238        MatScale_MPIAIJ,
2239        0,
2240        0,
2241        0,
2242 /*50*/ MatSetBlockSize_MPIAIJ,
2243        0,
2244        0,
2245        0,
2246        0,
2247 /*55*/ MatFDColoringCreate_MPIAIJ,
2248        0,
2249        MatSetUnfactored_MPIAIJ,
2250        MatPermute_MPIAIJ,
2251        0,
2252 /*60*/ MatGetSubMatrix_MPIAIJ,
2253        MatDestroy_MPIAIJ,
2254        MatView_MPIAIJ,
2255        0,
2256        0,
2257 /*65*/ 0,
2258        0,
2259        0,
2260        0,
2261        0,
2262 /*70*/ 0,
2263        0,
2264        MatSetColoring_MPIAIJ,
2265 #if defined(PETSC_HAVE_ADIC)
2266        MatSetValuesAdic_MPIAIJ,
2267 #else
2268        0,
2269 #endif
2270        MatSetValuesAdifor_MPIAIJ,
2271 /*75*/ 0,
2272        0,
2273        0,
2274        0,
2275        0,
2276 /*80*/ 0,
2277        0,
2278        0,
2279        0,
2280 /*84*/ MatLoad_MPIAIJ,
2281        0,
2282        0,
2283        0,
2284        0,
2285        0,
2286 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
2287        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
2288        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2289        MatPtAP_Basic,
2290        MatPtAPSymbolic_MPIAIJ,
2291 /*95*/ MatPtAPNumeric_MPIAIJ,
2292        0,
2293        0,
2294        0,
2295        0,
2296 /*100*/0,
2297        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
2298        MatPtAPNumeric_MPIAIJ_MPIAIJ,
2299        MatConjugate_MPIAIJ,
2300        0,
2301 /*105*/MatSetValuesRow_MPIAIJ,
2302        MatRealPart_MPIAIJ,
2303        MatImaginaryPart_MPIAIJ,
2304        0,
2305        0,
2306 /*110*/0,
2307        MatGetRedundantMatrix_MPIAIJ};
2308 
2309 /* ----------------------------------------------------------------------------------------*/
2310 
2311 EXTERN_C_BEGIN
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatStoreValues_MPIAIJ"
2314 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
2315 {
2316   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2317   PetscErrorCode ierr;
2318 
2319   PetscFunctionBegin;
2320   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2321   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 EXTERN_C_END
2325 
2326 EXTERN_C_BEGIN
2327 #undef __FUNCT__
2328 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
2329 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
2330 {
2331   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2336   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 EXTERN_C_END
2340 
2341 #include "petscpc.h"
2342 EXTERN_C_BEGIN
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
2345 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2346 {
2347   Mat_MPIAIJ     *b;
2348   PetscErrorCode ierr;
2349   PetscInt       i;
2350 
2351   PetscFunctionBegin;
2352   B->preallocated = PETSC_TRUE;
2353   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2354   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2355   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2356   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2357 
2358   B->rmap.bs = B->cmap.bs = 1;
2359   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2360   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2361   if (d_nnz) {
2362     for (i=0; i<B->rmap.n; i++) {
2363       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
2364     }
2365   }
2366   if (o_nnz) {
2367     for (i=0; i<B->rmap.n; i++) {
2368       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
2369     }
2370   }
2371   b = (Mat_MPIAIJ*)B->data;
2372 
2373   /* Explicitly create 2 MATSEQAIJ matrices. */
2374   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2375   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2376   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2377   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2378   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2379   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2380   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2381   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2382 
2383   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2384   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2385 
2386   PetscFunctionReturn(0);
2387 }
2388 EXTERN_C_END
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatDuplicate_MPIAIJ"
2392 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2393 {
2394   Mat            mat;
2395   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2396   PetscErrorCode ierr;
2397 
2398   PetscFunctionBegin;
2399   *newmat       = 0;
2400   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2401   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2402   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2403   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2404   a    = (Mat_MPIAIJ*)mat->data;
2405 
2406   mat->factor       = matin->factor;
2407   mat->rmap.bs      = matin->rmap.bs;
2408   mat->assembled    = PETSC_TRUE;
2409   mat->insertmode   = NOT_SET_VALUES;
2410   mat->preallocated = PETSC_TRUE;
2411 
2412   a->size           = oldmat->size;
2413   a->rank           = oldmat->rank;
2414   a->donotstash     = oldmat->donotstash;
2415   a->roworiented    = oldmat->roworiented;
2416   a->rowindices     = 0;
2417   a->rowvalues      = 0;
2418   a->getrowactive   = PETSC_FALSE;
2419 
2420   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2421   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2422 
2423   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2424   if (oldmat->colmap) {
2425 #if defined (PETSC_USE_CTABLE)
2426     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2427 #else
2428     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2429     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2430     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
2431 #endif
2432   } else a->colmap = 0;
2433   if (oldmat->garray) {
2434     PetscInt len;
2435     len  = oldmat->B->cmap.n;
2436     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2437     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2438     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
2439   } else a->garray = 0;
2440 
2441   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2442   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2443   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2444   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2445   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2446   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2447   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2448   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2449   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2450   *newmat = mat;
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #include "petscsys.h"
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLoad_MPIAIJ"
2458 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2459 {
2460   Mat            A;
2461   PetscScalar    *vals,*svals;
2462   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2463   MPI_Status     status;
2464   PetscErrorCode ierr;
2465   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
2466   PetscInt       i,nz,j,rstart,rend,mmax;
2467   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2468   PetscInt       *ourlens = PETSC_NULL,*procsnz = PETSC_NULL,*offlens = PETSC_NULL,jj,*mycols,*smycols;
2469   PetscInt       cend,cstart,n,*rowners;
2470   int            fd;
2471 
2472   PetscFunctionBegin;
2473   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2474   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2475   if (!rank) {
2476     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2477     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2478     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2479   }
2480 
2481   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2482   M = header[1]; N = header[2];
2483   /* determine ownership of all rows */
2484   m    = M/size + ((M % size) > rank);
2485   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
2486   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2487 
2488   /* First process needs enough room for process with most rows */
2489   if (!rank) {
2490     mmax       = rowners[1];
2491     for (i=2; i<size; i++) {
2492       mmax = PetscMax(mmax,rowners[i]);
2493     }
2494   } else mmax = m;
2495 
2496   rowners[0] = 0;
2497   for (i=2; i<=size; i++) {
2498     rowners[i] += rowners[i-1];
2499   }
2500   rstart = rowners[rank];
2501   rend   = rowners[rank+1];
2502 
2503   /* distribute row lengths to all processors */
2504   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2505   if (!rank) {
2506     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2507     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2508     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2509     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2510     for (j=0; j<m; j++) {
2511       procsnz[0] += ourlens[j];
2512     }
2513     for (i=1; i<size; i++) {
2514       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2515       /* calculate the number of nonzeros on each processor */
2516       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2517         procsnz[i] += rowlengths[j];
2518       }
2519       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2520     }
2521     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2522   } else {
2523     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2524   }
2525 
2526   if (!rank) {
2527     /* determine max buffer needed and allocate it */
2528     maxnz = 0;
2529     for (i=0; i<size; i++) {
2530       maxnz = PetscMax(maxnz,procsnz[i]);
2531     }
2532     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2533 
2534     /* read in my part of the matrix column indices  */
2535     nz   = procsnz[0];
2536     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2537     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2538 
2539     /* read in every one elses and ship off */
2540     for (i=1; i<size; i++) {
2541       nz   = procsnz[i];
2542       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2543       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2544     }
2545     ierr = PetscFree(cols);CHKERRQ(ierr);
2546   } else {
2547     /* determine buffer space needed for message */
2548     nz = 0;
2549     for (i=0; i<m; i++) {
2550       nz += ourlens[i];
2551     }
2552     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2553 
2554     /* receive message of column indices*/
2555     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2556     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2557     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2558   }
2559 
2560   /* determine column ownership if matrix is not square */
2561   if (N != M) {
2562     n      = N/size + ((N % size) > rank);
2563     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2564     cstart = cend - n;
2565   } else {
2566     cstart = rstart;
2567     cend   = rend;
2568     n      = cend - cstart;
2569   }
2570 
2571   /* loop over local rows, determining number of off diagonal entries */
2572   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2573   jj = 0;
2574   for (i=0; i<m; i++) {
2575     for (j=0; j<ourlens[i]; j++) {
2576       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2577       jj++;
2578     }
2579   }
2580 
2581   /* create our matrix */
2582   for (i=0; i<m; i++) {
2583     ourlens[i] -= offlens[i];
2584   }
2585   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2586   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2587   ierr = MatSetType(A,type);CHKERRQ(ierr);
2588   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2589 
2590   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2591   for (i=0; i<m; i++) {
2592     ourlens[i] += offlens[i];
2593   }
2594 
2595   if (!rank) {
2596     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2597 
2598     /* read in my part of the matrix numerical values  */
2599     nz   = procsnz[0];
2600     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2601 
2602     /* insert into matrix */
2603     jj      = rstart;
2604     smycols = mycols;
2605     svals   = vals;
2606     for (i=0; i<m; i++) {
2607       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2608       smycols += ourlens[i];
2609       svals   += ourlens[i];
2610       jj++;
2611     }
2612 
2613     /* read in other processors and ship out */
2614     for (i=1; i<size; i++) {
2615       nz   = procsnz[i];
2616       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2617       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2618     }
2619     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2620   } else {
2621     /* receive numeric values */
2622     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2623 
2624     /* receive message of values*/
2625     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2626     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2627     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2628 
2629     /* insert into matrix */
2630     jj      = rstart;
2631     smycols = mycols;
2632     svals   = vals;
2633     for (i=0; i<m; i++) {
2634       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2635       smycols += ourlens[i];
2636       svals   += ourlens[i];
2637       jj++;
2638     }
2639   }
2640   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2641   ierr = PetscFree(vals);CHKERRQ(ierr);
2642   ierr = PetscFree(mycols);CHKERRQ(ierr);
2643   ierr = PetscFree(rowners);CHKERRQ(ierr);
2644 
2645   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2646   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   *newmat = A;
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2653 /*
2654     Not great since it makes two copies of the submatrix, first an SeqAIJ
2655   in local and then by concatenating the local matrices the end result.
2656   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2657 */
2658 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2659 {
2660   PetscErrorCode ierr;
2661   PetscMPIInt    rank,size;
2662   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2663   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2664   Mat            *local,M,Mreuse;
2665   PetscScalar    *vwork,*aa;
2666   MPI_Comm       comm = mat->comm;
2667   Mat_SeqAIJ     *aij;
2668 
2669 
2670   PetscFunctionBegin;
2671   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2672   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2673 
2674   if (call ==  MAT_REUSE_MATRIX) {
2675     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2676     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2677     local = &Mreuse;
2678     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2679   } else {
2680     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2681     Mreuse = *local;
2682     ierr   = PetscFree(local);CHKERRQ(ierr);
2683   }
2684 
2685   /*
2686       m - number of local rows
2687       n - number of columns (same on all processors)
2688       rstart - first row in new global matrix generated
2689   */
2690   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2691   if (call == MAT_INITIAL_MATRIX) {
2692     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2693     ii  = aij->i;
2694     jj  = aij->j;
2695 
2696     /*
2697         Determine the number of non-zeros in the diagonal and off-diagonal
2698         portions of the matrix in order to do correct preallocation
2699     */
2700 
2701     /* first get start and end of "diagonal" columns */
2702     if (csize == PETSC_DECIDE) {
2703       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2704       if (mglobal == n) { /* square matrix */
2705 	nlocal = m;
2706       } else {
2707         nlocal = n/size + ((n % size) > rank);
2708       }
2709     } else {
2710       nlocal = csize;
2711     }
2712     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2713     rstart = rend - nlocal;
2714     if (rank == size - 1 && rend != n) {
2715       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2716     }
2717 
2718     /* next, compute all the lengths */
2719     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2720     olens = dlens + m;
2721     for (i=0; i<m; i++) {
2722       jend = ii[i+1] - ii[i];
2723       olen = 0;
2724       dlen = 0;
2725       for (j=0; j<jend; j++) {
2726         if (*jj < rstart || *jj >= rend) olen++;
2727         else dlen++;
2728         jj++;
2729       }
2730       olens[i] = olen;
2731       dlens[i] = dlen;
2732     }
2733     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2734     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2735     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2736     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2737     ierr = PetscFree(dlens);CHKERRQ(ierr);
2738   } else {
2739     PetscInt ml,nl;
2740 
2741     M = *newmat;
2742     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2743     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2744     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2745     /*
2746          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2747        rather than the slower MatSetValues().
2748     */
2749     M->was_assembled = PETSC_TRUE;
2750     M->assembled     = PETSC_FALSE;
2751   }
2752   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2753   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2754   ii  = aij->i;
2755   jj  = aij->j;
2756   aa  = aij->a;
2757   for (i=0; i<m; i++) {
2758     row   = rstart + i;
2759     nz    = ii[i+1] - ii[i];
2760     cwork = jj;     jj += nz;
2761     vwork = aa;     aa += nz;
2762     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2763   }
2764 
2765   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767   *newmat = M;
2768 
2769   /* save submatrix used in processor for next request */
2770   if (call ==  MAT_INITIAL_MATRIX) {
2771     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2772     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2773   }
2774 
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 EXTERN_C_BEGIN
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2782 {
2783   PetscInt       m,cstart, cend,j,nnz,i,d;
2784   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2785   const PetscInt *JJ;
2786   PetscScalar    *values;
2787   PetscErrorCode ierr;
2788 
2789   PetscFunctionBegin;
2790   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2791 
2792   B->rmap.bs = B->cmap.bs = 1;
2793   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2794   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2795   m      = B->rmap.n;
2796   cstart = B->cmap.rstart;
2797   cend   = B->cmap.rend;
2798   rstart = B->rmap.rstart;
2799 
2800   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2801   o_nnz = d_nnz + m;
2802 
2803   for (i=0; i<m; i++) {
2804     nnz     = Ii[i+1]- Ii[i];
2805     JJ      = J + Ii[i];
2806     nnz_max = PetscMax(nnz_max,nnz);
2807     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2808     for (j=0; j<nnz; j++) {
2809       if (*JJ >= cstart) break;
2810       JJ++;
2811     }
2812     d = 0;
2813     for (; j<nnz; j++) {
2814       if (*JJ++ >= cend) break;
2815       d++;
2816     }
2817     d_nnz[i] = d;
2818     o_nnz[i] = nnz - d;
2819   }
2820   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2821   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2822 
2823   if (v) values = (PetscScalar*)v;
2824   else {
2825     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2826     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2827   }
2828 
2829   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2830   for (i=0; i<m; i++) {
2831     ii   = i + rstart;
2832     nnz  = Ii[i+1]- Ii[i];
2833     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2834   }
2835   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2836   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2838 
2839   if (!v) {
2840     ierr = PetscFree(values);CHKERRQ(ierr);
2841   }
2842   PetscFunctionReturn(0);
2843 }
2844 EXTERN_C_END
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2848 /*@
2849    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2850    (the default parallel PETSc format).
2851 
2852    Collective on MPI_Comm
2853 
2854    Input Parameters:
2855 +  B - the matrix
2856 .  i - the indices into j for the start of each local row (starts with zero)
2857 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2858 -  v - optional values in the matrix
2859 
2860    Level: developer
2861 
2862    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2863      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2864 
2865 .keywords: matrix, aij, compressed row, sparse, parallel
2866 
2867 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2868           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
2869 @*/
2870 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2871 {
2872   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2873 
2874   PetscFunctionBegin;
2875   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2876   if (f) {
2877     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2878   }
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 #undef __FUNCT__
2883 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2884 /*@C
2885    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2886    (the default parallel PETSc format).  For good matrix assembly performance
2887    the user should preallocate the matrix storage by setting the parameters
2888    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2889    performance can be increased by more than a factor of 50.
2890 
2891    Collective on MPI_Comm
2892 
2893    Input Parameters:
2894 +  A - the matrix
2895 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2896            (same value is used for all local rows)
2897 .  d_nnz - array containing the number of nonzeros in the various rows of the
2898            DIAGONAL portion of the local submatrix (possibly different for each row)
2899            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2900            The size of this array is equal to the number of local rows, i.e 'm'.
2901            You must leave room for the diagonal entry even if it is zero.
2902 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2903            submatrix (same value is used for all local rows).
2904 -  o_nnz - array containing the number of nonzeros in the various rows of the
2905            OFF-DIAGONAL portion of the local submatrix (possibly different for
2906            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2907            structure. The size of this array is equal to the number
2908            of local rows, i.e 'm'.
2909 
2910    If the *_nnz parameter is given then the *_nz parameter is ignored
2911 
2912    The AIJ format (also called the Yale sparse matrix format or
2913    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2914    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2915 
2916    The parallel matrix is partitioned such that the first m0 rows belong to
2917    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2918    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2919 
2920    The DIAGONAL portion of the local submatrix of a processor can be defined
2921    as the submatrix which is obtained by extraction the part corresponding
2922    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2923    first row that belongs to the processor, and r2 is the last row belonging
2924    to the this processor. This is a square mxm matrix. The remaining portion
2925    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2926 
2927    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2928 
2929    Example usage:
2930 
2931    Consider the following 8x8 matrix with 34 non-zero values, that is
2932    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2933    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2934    as follows:
2935 
2936 .vb
2937             1  2  0  |  0  3  0  |  0  4
2938     Proc0   0  5  6  |  7  0  0  |  8  0
2939             9  0 10  | 11  0  0  | 12  0
2940     -------------------------------------
2941            13  0 14  | 15 16 17  |  0  0
2942     Proc1   0 18  0  | 19 20 21  |  0  0
2943             0  0  0  | 22 23  0  | 24  0
2944     -------------------------------------
2945     Proc2  25 26 27  |  0  0 28  | 29  0
2946            30  0  0  | 31 32 33  |  0 34
2947 .ve
2948 
2949    This can be represented as a collection of submatrices as:
2950 
2951 .vb
2952       A B C
2953       D E F
2954       G H I
2955 .ve
2956 
2957    Where the submatrices A,B,C are owned by proc0, D,E,F are
2958    owned by proc1, G,H,I are owned by proc2.
2959 
2960    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2961    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2962    The 'M','N' parameters are 8,8, and have the same values on all procs.
2963 
2964    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2965    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2966    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2967    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2968    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2969    matrix, ans [DF] as another SeqAIJ matrix.
2970 
2971    When d_nz, o_nz parameters are specified, d_nz storage elements are
2972    allocated for every row of the local diagonal submatrix, and o_nz
2973    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2974    One way to choose d_nz and o_nz is to use the max nonzerors per local
2975    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2976    In this case, the values of d_nz,o_nz are:
2977 .vb
2978      proc0 : dnz = 2, o_nz = 2
2979      proc1 : dnz = 3, o_nz = 2
2980      proc2 : dnz = 1, o_nz = 4
2981 .ve
2982    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2983    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2984    for proc3. i.e we are using 12+15+10=37 storage locations to store
2985    34 values.
2986 
2987    When d_nnz, o_nnz parameters are specified, the storage is specified
2988    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2989    In the above case the values for d_nnz,o_nnz are:
2990 .vb
2991      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2992      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2993      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2994 .ve
2995    Here the space allocated is sum of all the above values i.e 34, and
2996    hence pre-allocation is perfect.
2997 
2998    Level: intermediate
2999 
3000 .keywords: matrix, aij, compressed row, sparse, parallel
3001 
3002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
3003           MPIAIJ
3004 @*/
3005 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3006 {
3007   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3008 
3009   PetscFunctionBegin;
3010   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3011   if (f) {
3012     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3013   }
3014   PetscFunctionReturn(0);
3015 }
3016 
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3019 /*@C
3020      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3021          CSR format the local rows.
3022 
3023    Collective on MPI_Comm
3024 
3025    Input Parameters:
3026 +  comm - MPI communicator
3027 .  m - number of local rows (Cannot be PETSC_DECIDE)
3028 .  n - This value should be the same as the local size used in creating the
3029        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3030        calculated if N is given) For square matrices n is almost always m.
3031 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3032 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3033 .   i - row indices
3034 .   j - column indices
3035 -   a - matrix values
3036 
3037    Output Parameter:
3038 .   mat - the matrix
3039 
3040    Level: intermediate
3041 
3042    Notes:
3043        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3044      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3045      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3046 
3047        The i and j indices are 0 based
3048 
3049 .keywords: matrix, aij, compressed row, sparse, parallel
3050 
3051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3052           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
3053 @*/
3054 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3055 {
3056   PetscErrorCode ierr;
3057 
3058  PetscFunctionBegin;
3059   if (i[0]) {
3060     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3061   }
3062   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3063   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3064   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3065   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
3066   PetscFunctionReturn(0);
3067 }
3068 
3069 #undef __FUNCT__
3070 #define __FUNCT__ "MatCreateMPIAIJ"
3071 /*@C
3072    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
3073    (the default parallel PETSc format).  For good matrix assembly performance
3074    the user should preallocate the matrix storage by setting the parameters
3075    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3076    performance can be increased by more than a factor of 50.
3077 
3078    Collective on MPI_Comm
3079 
3080    Input Parameters:
3081 +  comm - MPI communicator
3082 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3083            This value should be the same as the local size used in creating the
3084            y vector for the matrix-vector product y = Ax.
3085 .  n - This value should be the same as the local size used in creating the
3086        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3087        calculated if N is given) For square matrices n is almost always m.
3088 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3089 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3090 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3091            (same value is used for all local rows)
3092 .  d_nnz - array containing the number of nonzeros in the various rows of the
3093            DIAGONAL portion of the local submatrix (possibly different for each row)
3094            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
3095            The size of this array is equal to the number of local rows, i.e 'm'.
3096            You must leave room for the diagonal entry even if it is zero.
3097 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3098            submatrix (same value is used for all local rows).
3099 -  o_nnz - array containing the number of nonzeros in the various rows of the
3100            OFF-DIAGONAL portion of the local submatrix (possibly different for
3101            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
3102            structure. The size of this array is equal to the number
3103            of local rows, i.e 'm'.
3104 
3105    Output Parameter:
3106 .  A - the matrix
3107 
3108    Notes:
3109    If the *_nnz parameter is given then the *_nz parameter is ignored
3110 
3111    m,n,M,N parameters specify the size of the matrix, and its partitioning across
3112    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
3113    storage requirements for this matrix.
3114 
3115    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
3116    processor than it must be used on all processors that share the object for
3117    that argument.
3118 
3119    The user MUST specify either the local or global matrix dimensions
3120    (possibly both).
3121 
3122    The parallel matrix is partitioned such that the first m0 rows belong to
3123    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3124    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3125 
3126    The DIAGONAL portion of the local submatrix of a processor can be defined
3127    as the submatrix which is obtained by extraction the part corresponding
3128    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
3129    first row that belongs to the processor, and r2 is the last row belonging
3130    to the this processor. This is a square mxm matrix. The remaining portion
3131    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
3132 
3133    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3134 
3135    When calling this routine with a single process communicator, a matrix of
3136    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
3137    type of communicator, use the construction mechanism:
3138      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
3139 
3140    By default, this format uses inodes (identical nodes) when possible.
3141    We search for consecutive rows with the same nonzero structure, thereby
3142    reusing matrix information to achieve increased efficiency.
3143 
3144    Options Database Keys:
3145 +  -mat_no_inode  - Do not use inodes
3146 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
3147 -  -mat_aij_oneindex - Internally use indexing starting at 1
3148         rather than 0.  Note that when calling MatSetValues(),
3149         the user still MUST index entries starting at 0!
3150 
3151 
3152    Example usage:
3153 
3154    Consider the following 8x8 matrix with 34 non-zero values, that is
3155    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3156    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3157    as follows:
3158 
3159 .vb
3160             1  2  0  |  0  3  0  |  0  4
3161     Proc0   0  5  6  |  7  0  0  |  8  0
3162             9  0 10  | 11  0  0  | 12  0
3163     -------------------------------------
3164            13  0 14  | 15 16 17  |  0  0
3165     Proc1   0 18  0  | 19 20 21  |  0  0
3166             0  0  0  | 22 23  0  | 24  0
3167     -------------------------------------
3168     Proc2  25 26 27  |  0  0 28  | 29  0
3169            30  0  0  | 31 32 33  |  0 34
3170 .ve
3171 
3172    This can be represented as a collection of submatrices as:
3173 
3174 .vb
3175       A B C
3176       D E F
3177       G H I
3178 .ve
3179 
3180    Where the submatrices A,B,C are owned by proc0, D,E,F are
3181    owned by proc1, G,H,I are owned by proc2.
3182 
3183    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3184    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3185    The 'M','N' parameters are 8,8, and have the same values on all procs.
3186 
3187    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3188    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3189    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3190    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3191    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3192    matrix, ans [DF] as another SeqAIJ matrix.
3193 
3194    When d_nz, o_nz parameters are specified, d_nz storage elements are
3195    allocated for every row of the local diagonal submatrix, and o_nz
3196    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3197    One way to choose d_nz and o_nz is to use the max nonzerors per local
3198    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3199    In this case, the values of d_nz,o_nz are:
3200 .vb
3201      proc0 : dnz = 2, o_nz = 2
3202      proc1 : dnz = 3, o_nz = 2
3203      proc2 : dnz = 1, o_nz = 4
3204 .ve
3205    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3206    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3207    for proc3. i.e we are using 12+15+10=37 storage locations to store
3208    34 values.
3209 
3210    When d_nnz, o_nnz parameters are specified, the storage is specified
3211    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3212    In the above case the values for d_nnz,o_nnz are:
3213 .vb
3214      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3215      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3216      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3217 .ve
3218    Here the space allocated is sum of all the above values i.e 34, and
3219    hence pre-allocation is perfect.
3220 
3221    Level: intermediate
3222 
3223 .keywords: matrix, aij, compressed row, sparse, parallel
3224 
3225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3226           MPIAIJ, MatCreateMPIAIJWithArrays()
3227 @*/
3228 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3229 {
3230   PetscErrorCode ierr;
3231   PetscMPIInt    size;
3232 
3233   PetscFunctionBegin;
3234   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3235   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3236   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3237   if (size > 1) {
3238     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
3239     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3240   } else {
3241     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
3242     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
3243   }
3244   PetscFunctionReturn(0);
3245 }
3246 
3247 #undef __FUNCT__
3248 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
3249 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3250 {
3251   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
3252 
3253   PetscFunctionBegin;
3254   *Ad     = a->A;
3255   *Ao     = a->B;
3256   *colmap = a->garray;
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatSetColoring_MPIAIJ"
3262 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
3263 {
3264   PetscErrorCode ierr;
3265   PetscInt       i;
3266   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3267 
3268   PetscFunctionBegin;
3269   if (coloring->ctype == IS_COLORING_GLOBAL) {
3270     ISColoringValue *allcolors,*colors;
3271     ISColoring      ocoloring;
3272 
3273     /* set coloring for diagonal portion */
3274     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
3275 
3276     /* set coloring for off-diagonal portion */
3277     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
3278     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3279     for (i=0; i<a->B->cmap.n; i++) {
3280       colors[i] = allcolors[a->garray[i]];
3281     }
3282     ierr = PetscFree(allcolors);CHKERRQ(ierr);
3283     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3284     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3285     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3286   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
3287     ISColoringValue *colors;
3288     PetscInt        *larray;
3289     ISColoring      ocoloring;
3290 
3291     /* set coloring for diagonal portion */
3292     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3293     for (i=0; i<a->A->cmap.n; i++) {
3294       larray[i] = i + A->cmap.rstart;
3295     }
3296     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
3297     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3298     for (i=0; i<a->A->cmap.n; i++) {
3299       colors[i] = coloring->colors[larray[i]];
3300     }
3301     ierr = PetscFree(larray);CHKERRQ(ierr);
3302     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3303     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
3304     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3305 
3306     /* set coloring for off-diagonal portion */
3307     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
3308     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
3309     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
3310     for (i=0; i<a->B->cmap.n; i++) {
3311       colors[i] = coloring->colors[larray[i]];
3312     }
3313     ierr = PetscFree(larray);CHKERRQ(ierr);
3314     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
3315     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
3316     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
3317   } else {
3318     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
3319   }
3320 
3321   PetscFunctionReturn(0);
3322 }
3323 
3324 #if defined(PETSC_HAVE_ADIC)
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
3327 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
3328 {
3329   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3330   PetscErrorCode ierr;
3331 
3332   PetscFunctionBegin;
3333   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
3334   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
3335   PetscFunctionReturn(0);
3336 }
3337 #endif
3338 
3339 #undef __FUNCT__
3340 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
3341 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
3342 {
3343   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3344   PetscErrorCode ierr;
3345 
3346   PetscFunctionBegin;
3347   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
3348   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatMerge"
3354 /*@C
3355       MatMerge - Creates a single large PETSc matrix by concatinating sequential
3356                  matrices from each processor
3357 
3358     Collective on MPI_Comm
3359 
3360    Input Parameters:
3361 +    comm - the communicators the parallel matrix will live on
3362 .    inmat - the input sequential matrices
3363 .    n - number of local columns (or PETSC_DECIDE)
3364 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3365 
3366    Output Parameter:
3367 .    outmat - the parallel matrix generated
3368 
3369     Level: advanced
3370 
3371    Notes: The number of columns of the matrix in EACH processor MUST be the same.
3372 
3373 @*/
3374 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3375 {
3376   PetscErrorCode ierr;
3377   PetscInt       m,N,i,rstart,nnz,Ii,*dnz,*onz;
3378   PetscInt       *indx;
3379   PetscScalar    *values;
3380 
3381   PetscFunctionBegin;
3382   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3383   if (scall == MAT_INITIAL_MATRIX){
3384     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
3385     if (n == PETSC_DECIDE){
3386       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
3387     }
3388     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3389     rstart -= m;
3390 
3391     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3392     for (i=0;i<m;i++) {
3393       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3394       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
3395       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
3396     }
3397     /* This routine will ONLY return MPIAIJ type matrix */
3398     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3399     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3400     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
3401     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
3402     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3403 
3404   } else if (scall == MAT_REUSE_MATRIX){
3405     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
3406   } else {
3407     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3408   }
3409 
3410   for (i=0;i<m;i++) {
3411     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3412     Ii    = i + rstart;
3413     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3414     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3415   }
3416   ierr = MatDestroy(inmat);CHKERRQ(ierr);
3417   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3418   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3419 
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 #undef __FUNCT__
3424 #define __FUNCT__ "MatFileSplit"
3425 PetscErrorCode MatFileSplit(Mat A,char *outfile)
3426 {
3427   PetscErrorCode    ierr;
3428   PetscMPIInt       rank;
3429   PetscInt          m,N,i,rstart,nnz;
3430   size_t            len;
3431   const PetscInt    *indx;
3432   PetscViewer       out;
3433   char              *name;
3434   Mat               B;
3435   const PetscScalar *values;
3436 
3437   PetscFunctionBegin;
3438   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
3439   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
3440   /* Should this be the type of the diagonal block of A? */
3441   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
3442   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
3443   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
3444   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3445   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
3446   for (i=0;i<m;i++) {
3447     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3448     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3449     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
3450   }
3451   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3452   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3453 
3454   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
3455   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
3456   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
3457   sprintf(name,"%s.%d",outfile,rank);
3458   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
3459   ierr = PetscFree(name);
3460   ierr = MatView(B,out);CHKERRQ(ierr);
3461   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
3462   ierr = MatDestroy(B);CHKERRQ(ierr);
3463   PetscFunctionReturn(0);
3464 }
3465 
3466 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
3467 #undef __FUNCT__
3468 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
3469 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
3470 {
3471   PetscErrorCode       ierr;
3472   Mat_Merge_SeqsToMPI  *merge;
3473   PetscContainer       container;
3474 
3475   PetscFunctionBegin;
3476   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3477   if (container) {
3478     ierr = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3479     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
3480     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
3481     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
3482     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
3483     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
3484     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
3485     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
3486     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
3487     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
3488     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
3489     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
3490 
3491     ierr = PetscContainerDestroy(container);CHKERRQ(ierr);
3492     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3493   }
3494   ierr = PetscFree(merge);CHKERRQ(ierr);
3495 
3496   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3497   PetscFunctionReturn(0);
3498 }
3499 
3500 #include "src/mat/utils/freespace.h"
3501 #include "petscbt.h"
3502 static PetscEvent logkey_seqstompinum = 0;
3503 #undef __FUNCT__
3504 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3505 /*@C
3506       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3507                  matrices from each processor
3508 
3509     Collective on MPI_Comm
3510 
3511    Input Parameters:
3512 +    comm - the communicators the parallel matrix will live on
3513 .    seqmat - the input sequential matrices
3514 .    m - number of local rows (or PETSC_DECIDE)
3515 .    n - number of local columns (or PETSC_DECIDE)
3516 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3517 
3518    Output Parameter:
3519 .    mpimat - the parallel matrix generated
3520 
3521     Level: advanced
3522 
3523    Notes:
3524      The dimensions of the sequential matrix in each processor MUST be the same.
3525      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3526      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3527 @*/
3528 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3529 {
3530   PetscErrorCode       ierr;
3531   MPI_Comm             comm=mpimat->comm;
3532   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3533   PetscMPIInt          size,rank,taga,*len_s;
3534   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3535   PetscInt             proc,m;
3536   PetscInt             **buf_ri,**buf_rj;
3537   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3538   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3539   MPI_Request          *s_waits,*r_waits;
3540   MPI_Status           *status;
3541   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3542   Mat_Merge_SeqsToMPI  *merge;
3543   PetscContainer       container;
3544 
3545   PetscFunctionBegin;
3546   if (!logkey_seqstompinum) {
3547     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3548   }
3549   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3550 
3551   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3552   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3553 
3554   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3555   if (container) {
3556     ierr  = PetscContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3557   }
3558   bi     = merge->bi;
3559   bj     = merge->bj;
3560   buf_ri = merge->buf_ri;
3561   buf_rj = merge->buf_rj;
3562 
3563   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3564   owners = merge->rowmap.range;
3565   len_s  = merge->len_s;
3566 
3567   /* send and recv matrix values */
3568   /*-----------------------------*/
3569   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3570   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3571 
3572   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3573   for (proc=0,k=0; proc<size; proc++){
3574     if (!len_s[proc]) continue;
3575     i = owners[proc];
3576     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3577     k++;
3578   }
3579 
3580   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3581   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3582   ierr = PetscFree(status);CHKERRQ(ierr);
3583 
3584   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3585   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3586 
3587   /* insert mat values of mpimat */
3588   /*----------------------------*/
3589   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3590   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3591   nextrow = buf_ri_k + merge->nrecv;
3592   nextai  = nextrow + merge->nrecv;
3593 
3594   for (k=0; k<merge->nrecv; k++){
3595     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3596     nrows = *(buf_ri_k[k]);
3597     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3598     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3599   }
3600 
3601   /* set values of ba */
3602   m = merge->rowmap.n;
3603   for (i=0; i<m; i++) {
3604     arow = owners[rank] + i;
3605     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3606     bnzi = bi[i+1] - bi[i];
3607     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3608 
3609     /* add local non-zero vals of this proc's seqmat into ba */
3610     anzi = ai[arow+1] - ai[arow];
3611     aj   = a->j + ai[arow];
3612     aa   = a->a + ai[arow];
3613     nextaj = 0;
3614     for (j=0; nextaj<anzi; j++){
3615       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3616         ba_i[j] += aa[nextaj++];
3617       }
3618     }
3619 
3620     /* add received vals into ba */
3621     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3622       /* i-th row */
3623       if (i == *nextrow[k]) {
3624         anzi = *(nextai[k]+1) - *nextai[k];
3625         aj   = buf_rj[k] + *(nextai[k]);
3626         aa   = abuf_r[k] + *(nextai[k]);
3627         nextaj = 0;
3628         for (j=0; nextaj<anzi; j++){
3629           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3630             ba_i[j] += aa[nextaj++];
3631           }
3632         }
3633         nextrow[k]++; nextai[k]++;
3634       }
3635     }
3636     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3637   }
3638   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3639   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3640 
3641   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3642   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3643   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3644   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 static PetscEvent logkey_seqstompisym = 0;
3649 #undef __FUNCT__
3650 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3651 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3652 {
3653   PetscErrorCode       ierr;
3654   Mat                  B_mpi;
3655   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3656   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3657   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3658   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3659   PetscInt             len,proc,*dnz,*onz;
3660   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3661   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3662   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3663   MPI_Status           *status;
3664   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3665   PetscBT              lnkbt;
3666   Mat_Merge_SeqsToMPI  *merge;
3667   PetscContainer       container;
3668 
3669   PetscFunctionBegin;
3670   if (!logkey_seqstompisym) {
3671     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3672   }
3673   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3674 
3675   /* make sure it is a PETSc comm */
3676   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3678   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3679 
3680   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3681   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3682 
3683   /* determine row ownership */
3684   /*---------------------------------------------------------*/
3685   merge->rowmap.n = m;
3686   merge->rowmap.N = M;
3687   merge->rowmap.bs = 1;
3688   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3689   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3690   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3691 
3692   m      = merge->rowmap.n;
3693   M      = merge->rowmap.N;
3694   owners = merge->rowmap.range;
3695 
3696   /* determine the number of messages to send, their lengths */
3697   /*---------------------------------------------------------*/
3698   len_s  = merge->len_s;
3699 
3700   len = 0;  /* length of buf_si[] */
3701   merge->nsend = 0;
3702   for (proc=0; proc<size; proc++){
3703     len_si[proc] = 0;
3704     if (proc == rank){
3705       len_s[proc] = 0;
3706     } else {
3707       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3708       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3709     }
3710     if (len_s[proc]) {
3711       merge->nsend++;
3712       nrows = 0;
3713       for (i=owners[proc]; i<owners[proc+1]; i++){
3714         if (ai[i+1] > ai[i]) nrows++;
3715       }
3716       len_si[proc] = 2*(nrows+1);
3717       len += len_si[proc];
3718     }
3719   }
3720 
3721   /* determine the number and length of messages to receive for ij-structure */
3722   /*-------------------------------------------------------------------------*/
3723   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3724   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3725 
3726   /* post the Irecv of j-structure */
3727   /*-------------------------------*/
3728   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3729   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3730 
3731   /* post the Isend of j-structure */
3732   /*--------------------------------*/
3733   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3734   sj_waits = si_waits + merge->nsend;
3735 
3736   for (proc=0, k=0; proc<size; proc++){
3737     if (!len_s[proc]) continue;
3738     i = owners[proc];
3739     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3740     k++;
3741   }
3742 
3743   /* receives and sends of j-structure are complete */
3744   /*------------------------------------------------*/
3745   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3746   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3747 
3748   /* send and recv i-structure */
3749   /*---------------------------*/
3750   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3751   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3752 
3753   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3754   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3755   for (proc=0,k=0; proc<size; proc++){
3756     if (!len_s[proc]) continue;
3757     /* form outgoing message for i-structure:
3758          buf_si[0]:                 nrows to be sent
3759                [1:nrows]:           row index (global)
3760                [nrows+1:2*nrows+1]: i-structure index
3761     */
3762     /*-------------------------------------------*/
3763     nrows = len_si[proc]/2 - 1;
3764     buf_si_i    = buf_si + nrows+1;
3765     buf_si[0]   = nrows;
3766     buf_si_i[0] = 0;
3767     nrows = 0;
3768     for (i=owners[proc]; i<owners[proc+1]; i++){
3769       anzi = ai[i+1] - ai[i];
3770       if (anzi) {
3771         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3772         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3773         nrows++;
3774       }
3775     }
3776     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3777     k++;
3778     buf_si += len_si[proc];
3779   }
3780 
3781   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3782   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3783 
3784   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3785   for (i=0; i<merge->nrecv; i++){
3786     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3787   }
3788 
3789   ierr = PetscFree(len_si);CHKERRQ(ierr);
3790   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3791   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3792   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3793   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3794   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3795   ierr = PetscFree(status);CHKERRQ(ierr);
3796 
3797   /* compute a local seq matrix in each processor */
3798   /*----------------------------------------------*/
3799   /* allocate bi array and free space for accumulating nonzero column info */
3800   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3801   bi[0] = 0;
3802 
3803   /* create and initialize a linked list */
3804   nlnk = N+1;
3805   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3806 
3807   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3808   len = 0;
3809   len  = ai[owners[rank+1]] - ai[owners[rank]];
3810   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3811   current_space = free_space;
3812 
3813   /* determine symbolic info for each local row */
3814   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3815   nextrow = buf_ri_k + merge->nrecv;
3816   nextai  = nextrow + merge->nrecv;
3817   for (k=0; k<merge->nrecv; k++){
3818     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3819     nrows = *buf_ri_k[k];
3820     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3821     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3822   }
3823 
3824   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3825   len = 0;
3826   for (i=0;i<m;i++) {
3827     bnzi   = 0;
3828     /* add local non-zero cols of this proc's seqmat into lnk */
3829     arow   = owners[rank] + i;
3830     anzi   = ai[arow+1] - ai[arow];
3831     aj     = a->j + ai[arow];
3832     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3833     bnzi += nlnk;
3834     /* add received col data into lnk */
3835     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3836       if (i == *nextrow[k]) { /* i-th row */
3837         anzi = *(nextai[k]+1) - *nextai[k];
3838         aj   = buf_rj[k] + *nextai[k];
3839         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3840         bnzi += nlnk;
3841         nextrow[k]++; nextai[k]++;
3842       }
3843     }
3844     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3845 
3846     /* if free space is not available, make more free space */
3847     if (current_space->local_remaining<bnzi) {
3848       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3849       nspacedouble++;
3850     }
3851     /* copy data into free space, then initialize lnk */
3852     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3853     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3854 
3855     current_space->array           += bnzi;
3856     current_space->local_used      += bnzi;
3857     current_space->local_remaining -= bnzi;
3858 
3859     bi[i+1] = bi[i] + bnzi;
3860   }
3861 
3862   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3863 
3864   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3865   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3866   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3867 
3868   /* create symbolic parallel matrix B_mpi */
3869   /*---------------------------------------*/
3870   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3871   if (n==PETSC_DECIDE) {
3872     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3873   } else {
3874     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3875   }
3876   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3877   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3878   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3879 
3880   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3881   B_mpi->assembled     = PETSC_FALSE;
3882   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3883   merge->bi            = bi;
3884   merge->bj            = bj;
3885   merge->buf_ri        = buf_ri;
3886   merge->buf_rj        = buf_rj;
3887   merge->coi           = PETSC_NULL;
3888   merge->coj           = PETSC_NULL;
3889   merge->owners_co     = PETSC_NULL;
3890 
3891   /* attach the supporting struct to B_mpi for reuse */
3892   ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3893   ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
3894   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3895   *mpimat = B_mpi;
3896 
3897   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3898   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3899   PetscFunctionReturn(0);
3900 }
3901 
3902 static PetscEvent logkey_seqstompi = 0;
3903 #undef __FUNCT__
3904 #define __FUNCT__ "MatMerge_SeqsToMPI"
3905 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3906 {
3907   PetscErrorCode   ierr;
3908 
3909   PetscFunctionBegin;
3910   if (!logkey_seqstompi) {
3911     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3912   }
3913   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3914   if (scall == MAT_INITIAL_MATRIX){
3915     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3916   }
3917   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3918   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3919   PetscFunctionReturn(0);
3920 }
3921 static PetscEvent logkey_getlocalmat = 0;
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatGetLocalMat"
3924 /*@C
3925      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3926 
3927     Not Collective
3928 
3929    Input Parameters:
3930 +    A - the matrix
3931 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3932 
3933    Output Parameter:
3934 .    A_loc - the local sequential matrix generated
3935 
3936     Level: developer
3937 
3938 @*/
3939 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3940 {
3941   PetscErrorCode  ierr;
3942   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3943   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3944   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3945   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3946   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3947   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3948 
3949   PetscFunctionBegin;
3950   if (!logkey_getlocalmat) {
3951     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3952   }
3953   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3954   if (scall == MAT_INITIAL_MATRIX){
3955     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3956     ci[0] = 0;
3957     for (i=0; i<am; i++){
3958       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3959     }
3960     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3961     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3962     k = 0;
3963     for (i=0; i<am; i++) {
3964       ncols_o = bi[i+1] - bi[i];
3965       ncols_d = ai[i+1] - ai[i];
3966       /* off-diagonal portion of A */
3967       for (jo=0; jo<ncols_o; jo++) {
3968         col = cmap[*bj];
3969         if (col >= cstart) break;
3970         cj[k]   = col; bj++;
3971         ca[k++] = *ba++;
3972       }
3973       /* diagonal portion of A */
3974       for (j=0; j<ncols_d; j++) {
3975         cj[k]   = cstart + *aj++;
3976         ca[k++] = *aa++;
3977       }
3978       /* off-diagonal portion of A */
3979       for (j=jo; j<ncols_o; j++) {
3980         cj[k]   = cmap[*bj++];
3981         ca[k++] = *ba++;
3982       }
3983     }
3984     /* put together the new matrix */
3985     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3986     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3987     /* Since these are PETSc arrays, change flags to free them as necessary. */
3988     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
3989     mat->free_a  = PETSC_TRUE;
3990     mat->free_ij = PETSC_TRUE;
3991     mat->nonew   = 0;
3992   } else if (scall == MAT_REUSE_MATRIX){
3993     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3994     ci = mat->i; cj = mat->j; ca = mat->a;
3995     for (i=0; i<am; i++) {
3996       /* off-diagonal portion of A */
3997       ncols_o = bi[i+1] - bi[i];
3998       for (jo=0; jo<ncols_o; jo++) {
3999         col = cmap[*bj];
4000         if (col >= cstart) break;
4001         *ca++ = *ba++; bj++;
4002       }
4003       /* diagonal portion of A */
4004       ncols_d = ai[i+1] - ai[i];
4005       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
4006       /* off-diagonal portion of A */
4007       for (j=jo; j<ncols_o; j++) {
4008         *ca++ = *ba++; bj++;
4009       }
4010     }
4011   } else {
4012     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
4013   }
4014 
4015   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
4016   PetscFunctionReturn(0);
4017 }
4018 
4019 static PetscEvent logkey_getlocalmatcondensed = 0;
4020 #undef __FUNCT__
4021 #define __FUNCT__ "MatGetLocalMatCondensed"
4022 /*@C
4023      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
4024 
4025     Not Collective
4026 
4027    Input Parameters:
4028 +    A - the matrix
4029 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4030 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
4031 
4032    Output Parameter:
4033 .    A_loc - the local sequential matrix generated
4034 
4035     Level: developer
4036 
4037 @*/
4038 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
4039 {
4040   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4041   PetscErrorCode    ierr;
4042   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
4043   IS                isrowa,iscola;
4044   Mat               *aloc;
4045 
4046   PetscFunctionBegin;
4047   if (!logkey_getlocalmatcondensed) {
4048     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
4049   }
4050   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4051   if (!row){
4052     start = A->rmap.rstart; end = A->rmap.rend;
4053     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
4054   } else {
4055     isrowa = *row;
4056   }
4057   if (!col){
4058     start = A->cmap.rstart;
4059     cmap  = a->garray;
4060     nzA   = a->A->cmap.n;
4061     nzB   = a->B->cmap.n;
4062     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4063     ncols = 0;
4064     for (i=0; i<nzB; i++) {
4065       if (cmap[i] < start) idx[ncols++] = cmap[i];
4066       else break;
4067     }
4068     imark = i;
4069     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
4070     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
4071     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
4072     ierr = PetscFree(idx);CHKERRQ(ierr);
4073   } else {
4074     iscola = *col;
4075   }
4076   if (scall != MAT_INITIAL_MATRIX){
4077     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
4078     aloc[0] = *A_loc;
4079   }
4080   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
4081   *A_loc = aloc[0];
4082   ierr = PetscFree(aloc);CHKERRQ(ierr);
4083   if (!row){
4084     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
4085   }
4086   if (!col){
4087     ierr = ISDestroy(iscola);CHKERRQ(ierr);
4088   }
4089   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
4090   PetscFunctionReturn(0);
4091 }
4092 
4093 static PetscEvent logkey_GetBrowsOfAcols = 0;
4094 #undef __FUNCT__
4095 #define __FUNCT__ "MatGetBrowsOfAcols"
4096 /*@C
4097     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
4098 
4099     Collective on Mat
4100 
4101    Input Parameters:
4102 +    A,B - the matrices in mpiaij format
4103 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4104 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
4105 
4106    Output Parameter:
4107 +    rowb, colb - index sets of rows and columns of B to extract
4108 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
4109 -    B_seq - the sequential matrix generated
4110 
4111     Level: developer
4112 
4113 @*/
4114 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
4115 {
4116   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
4117   PetscErrorCode    ierr;
4118   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
4119   IS                isrowb,iscolb;
4120   Mat               *bseq;
4121 
4122   PetscFunctionBegin;
4123   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4124     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4125   }
4126   if (!logkey_GetBrowsOfAcols) {
4127     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
4128   }
4129   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4130 
4131   if (scall == MAT_INITIAL_MATRIX){
4132     start = A->cmap.rstart;
4133     cmap  = a->garray;
4134     nzA   = a->A->cmap.n;
4135     nzB   = a->B->cmap.n;
4136     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
4137     ncols = 0;
4138     for (i=0; i<nzB; i++) {  /* row < local row index */
4139       if (cmap[i] < start) idx[ncols++] = cmap[i];
4140       else break;
4141     }
4142     imark = i;
4143     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
4144     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
4145     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
4146     ierr = PetscFree(idx);CHKERRQ(ierr);
4147     *brstart = imark;
4148     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
4149   } else {
4150     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
4151     isrowb = *rowb; iscolb = *colb;
4152     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
4153     bseq[0] = *B_seq;
4154   }
4155   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
4156   *B_seq = bseq[0];
4157   ierr = PetscFree(bseq);CHKERRQ(ierr);
4158   if (!rowb){
4159     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
4160   } else {
4161     *rowb = isrowb;
4162   }
4163   if (!colb){
4164     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
4165   } else {
4166     *colb = iscolb;
4167   }
4168   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
4169   PetscFunctionReturn(0);
4170 }
4171 
4172 static PetscEvent logkey_GetBrowsOfAocols = 0;
4173 #undef __FUNCT__
4174 #define __FUNCT__ "MatGetBrowsOfAoCols"
4175 /*@C
4176     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
4177     of the OFF-DIAGONAL portion of local A
4178 
4179     Collective on Mat
4180 
4181    Input Parameters:
4182 +    A,B - the matrices in mpiaij format
4183 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4184 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
4185 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
4186 
4187    Output Parameter:
4188 +    B_oth - the sequential matrix generated
4189 
4190     Level: developer
4191 
4192 @*/
4193 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
4194 {
4195   VecScatter_MPI_General *gen_to,*gen_from;
4196   PetscErrorCode         ierr;
4197   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
4198   Mat_SeqAIJ             *b_oth;
4199   VecScatter             ctx=a->Mvctx;
4200   MPI_Comm               comm=ctx->comm;
4201   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
4202   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
4203   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
4204   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
4205   MPI_Request            *rwaits = PETSC_NULL,*swaits = PETSC_NULL;
4206   MPI_Status             *sstatus,rstatus;
4207   PetscInt               *cols,sbs,rbs;
4208   PetscScalar            *vals;
4209 
4210   PetscFunctionBegin;
4211   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
4212     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
4213   }
4214   if (!logkey_GetBrowsOfAocols) {
4215     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
4216   }
4217   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4218   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4219 
4220   gen_to   = (VecScatter_MPI_General*)ctx->todata;
4221   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
4222   rvalues  = gen_from->values; /* holds the length of receiving row */
4223   svalues  = gen_to->values;   /* holds the length of sending row */
4224   nrecvs   = gen_from->n;
4225   nsends   = gen_to->n;
4226 
4227   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
4228   srow     = gen_to->indices;   /* local row index to be sent */
4229   sstarts  = gen_to->starts;
4230   sprocs   = gen_to->procs;
4231   sstatus  = gen_to->sstatus;
4232   sbs      = gen_to->bs;
4233   rstarts  = gen_from->starts;
4234   rprocs   = gen_from->procs;
4235   rbs      = gen_from->bs;
4236 
4237   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
4238   if (scall == MAT_INITIAL_MATRIX){
4239     /* i-array */
4240     /*---------*/
4241     /*  post receives */
4242     for (i=0; i<nrecvs; i++){
4243       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4244       nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
4245       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4246     }
4247 
4248     /* pack the outgoing message */
4249     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
4250     rstartsj = sstartsj + nsends +1;
4251     sstartsj[0] = 0;  rstartsj[0] = 0;
4252     len = 0; /* total length of j or a array to be sent */
4253     k = 0;
4254     for (i=0; i<nsends; i++){
4255       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
4256       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4257       for (j=0; j<nrows; j++) {
4258         row = srow[k] + B->rmap.range[rank]; /* global row idx */
4259         for (l=0; l<sbs; l++){
4260           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
4261           rowlen[j*sbs+l] = ncols;
4262           len += ncols;
4263           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
4264         }
4265         k++;
4266       }
4267       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4268       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
4269     }
4270     /* recvs and sends of i-array are completed */
4271     i = nrecvs;
4272     while (i--) {
4273       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4274     }
4275     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4276 
4277     /* allocate buffers for sending j and a arrays */
4278     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
4279     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
4280 
4281     /* create i-array of B_oth */
4282     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
4283     b_othi[0] = 0;
4284     len = 0; /* total length of j or a array to be received */
4285     k = 0;
4286     for (i=0; i<nrecvs; i++){
4287       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
4288       nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
4289       for (j=0; j<nrows; j++) {
4290         b_othi[k+1] = b_othi[k] + rowlen[j];
4291         len += rowlen[j]; k++;
4292       }
4293       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
4294     }
4295 
4296     /* allocate space for j and a arrrays of B_oth */
4297     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
4298     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
4299 
4300     /* j-array */
4301     /*---------*/
4302     /*  post receives of j-array */
4303     for (i=0; i<nrecvs; i++){
4304       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4305       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4306     }
4307 
4308     /* pack the outgoing message j-array */
4309     k = 0;
4310     for (i=0; i<nsends; i++){
4311       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4312       bufJ = bufj+sstartsj[i];
4313       for (j=0; j<nrows; j++) {
4314         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4315         for (ll=0; ll<sbs; ll++){
4316           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4317           for (l=0; l<ncols; l++){
4318             *bufJ++ = cols[l];
4319           }
4320           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
4321         }
4322       }
4323       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4324     }
4325 
4326     /* recvs and sends of j-array are completed */
4327     i = nrecvs;
4328     while (i--) {
4329       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4330     }
4331     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4332   } else if (scall == MAT_REUSE_MATRIX){
4333     sstartsj = *startsj;
4334     rstartsj = sstartsj + nsends +1;
4335     bufa     = *bufa_ptr;
4336     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
4337     b_otha   = b_oth->a;
4338   } else {
4339     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
4340   }
4341 
4342   /* a-array */
4343   /*---------*/
4344   /*  post receives of a-array */
4345   for (i=0; i<nrecvs; i++){
4346     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
4347     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
4348   }
4349 
4350   /* pack the outgoing message a-array */
4351   k = 0;
4352   for (i=0; i<nsends; i++){
4353     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
4354     bufA = bufa+sstartsj[i];
4355     for (j=0; j<nrows; j++) {
4356       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
4357       for (ll=0; ll<sbs; ll++){
4358         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4359         for (l=0; l<ncols; l++){
4360           *bufA++ = vals[l];
4361         }
4362         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
4363       }
4364     }
4365     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
4366   }
4367   /* recvs and sends of a-array are completed */
4368   i = nrecvs;
4369   while (i--) {
4370     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
4371   }
4372   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
4373   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
4374 
4375   if (scall == MAT_INITIAL_MATRIX){
4376     /* put together the new matrix */
4377     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
4378 
4379     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4380     /* Since these are PETSc arrays, change flags to free them as necessary. */
4381     b_oth          = (Mat_SeqAIJ *)(*B_oth)->data;
4382     b_oth->free_a  = PETSC_TRUE;
4383     b_oth->free_ij = PETSC_TRUE;
4384     b_oth->nonew   = 0;
4385 
4386     ierr = PetscFree(bufj);CHKERRQ(ierr);
4387     if (!startsj || !bufa_ptr){
4388       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
4389       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
4390     } else {
4391       *startsj  = sstartsj;
4392       *bufa_ptr = bufa;
4393     }
4394   }
4395   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
4396   PetscFunctionReturn(0);
4397 }
4398 
4399 #undef __FUNCT__
4400 #define __FUNCT__ "MatGetCommunicationStructs"
4401 /*@C
4402   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
4403 
4404   Not Collective
4405 
4406   Input Parameters:
4407 . A - The matrix in mpiaij format
4408 
4409   Output Parameter:
4410 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
4411 . colmap - A map from global column index to local index into lvec
4412 - multScatter - A scatter from the argument of a matrix-vector product to lvec
4413 
4414   Level: developer
4415 
4416 @*/
4417 #if defined (PETSC_USE_CTABLE)
4418 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
4419 #else
4420 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
4421 #endif
4422 {
4423   Mat_MPIAIJ *a;
4424 
4425   PetscFunctionBegin;
4426   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
4427   PetscValidPointer(lvec, 2)
4428   PetscValidPointer(colmap, 3)
4429   PetscValidPointer(multScatter, 4)
4430   a = (Mat_MPIAIJ *) A->data;
4431   if (lvec) *lvec = a->lvec;
4432   if (colmap) *colmap = a->colmap;
4433   if (multScatter) *multScatter = a->Mvctx;
4434   PetscFunctionReturn(0);
4435 }
4436 
4437 EXTERN_C_BEGIN
4438 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICRL(Mat,MatType,MatReuse,Mat*);
4439 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIAIJ_MPICSRPERM(Mat,MatType,MatReuse,Mat*);
4440 EXTERN_C_END
4441 
4442 /*MC
4443    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
4444 
4445    Options Database Keys:
4446 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
4447 
4448   Level: beginner
4449 
4450 .seealso: MatCreateMPIAIJ
4451 M*/
4452 
4453 EXTERN_C_BEGIN
4454 #undef __FUNCT__
4455 #define __FUNCT__ "MatCreate_MPIAIJ"
4456 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
4457 {
4458   Mat_MPIAIJ     *b;
4459   PetscErrorCode ierr;
4460   PetscMPIInt    size;
4461 
4462   PetscFunctionBegin;
4463   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
4464 
4465   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
4466   B->data         = (void*)b;
4467   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
4468   B->factor       = 0;
4469   B->rmap.bs      = 1;
4470   B->assembled    = PETSC_FALSE;
4471   B->mapping      = 0;
4472 
4473   B->insertmode      = NOT_SET_VALUES;
4474   b->size            = size;
4475   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
4476 
4477   /* build cache for off array entries formed */
4478   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
4479   b->donotstash  = PETSC_FALSE;
4480   b->colmap      = 0;
4481   b->garray      = 0;
4482   b->roworiented = PETSC_TRUE;
4483 
4484   /* stuff used for matrix vector multiply */
4485   b->lvec      = PETSC_NULL;
4486   b->Mvctx     = PETSC_NULL;
4487 
4488   /* stuff for MatGetRow() */
4489   b->rowindices   = 0;
4490   b->rowvalues    = 0;
4491   b->getrowactive = PETSC_FALSE;
4492 
4493 
4494   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
4495                                      "MatStoreValues_MPIAIJ",
4496                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
4497   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
4498                                      "MatRetrieveValues_MPIAIJ",
4499                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
4500   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
4501 				     "MatGetDiagonalBlock_MPIAIJ",
4502                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
4503   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
4504 				     "MatIsTranspose_MPIAIJ",
4505 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
4506   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
4507 				     "MatMPIAIJSetPreallocation_MPIAIJ",
4508 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
4509   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
4510 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4511 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4512   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4513 				     "MatDiagonalScaleLocal_MPIAIJ",
4514 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4515   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicsrperm_C",
4516                                      "MatConvert_MPIAIJ_MPICSRPERM",
4517                                       MatConvert_MPIAIJ_MPICSRPERM);CHKERRQ(ierr);
4518   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_mpicrl_C",
4519                                      "MatConvert_MPIAIJ_MPICRL",
4520                                       MatConvert_MPIAIJ_MPICRL);CHKERRQ(ierr);
4521   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
4522   PetscFunctionReturn(0);
4523 }
4524 EXTERN_C_END
4525 
4526 #undef __FUNCT__
4527 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
4528 /*@C
4529      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
4530          and "off-diagonal" part of the matrix in CSR format.
4531 
4532    Collective on MPI_Comm
4533 
4534    Input Parameters:
4535 +  comm - MPI communicator
4536 .  m - number of local rows (Cannot be PETSC_DECIDE)
4537 .  n - This value should be the same as the local size used in creating the
4538        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4539        calculated if N is given) For square matrices n is almost always m.
4540 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4541 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4542 .   i - row indices for "diagonal" portion of matrix
4543 .   j - column indices
4544 .   a - matrix values
4545 .   oi - row indices for "off-diagonal" portion of matrix
4546 .   oj - column indices
4547 -   oa - matrix values
4548 
4549    Output Parameter:
4550 .   mat - the matrix
4551 
4552    Level: advanced
4553 
4554    Notes:
4555        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc.
4556 
4557        The i and j indices are 0 based
4558 
4559        See MatCreateMPIAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
4560 
4561 
4562 .keywords: matrix, aij, compressed row, sparse, parallel
4563 
4564 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4565           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithArrays()
4566 @*/
4567 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],
4568 								PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
4569 {
4570   PetscErrorCode ierr;
4571   Mat_MPIAIJ     *maij;
4572 
4573  PetscFunctionBegin;
4574   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4575   if (i[0]) {
4576     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4577   }
4578   if (oi[0]) {
4579     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
4580   }
4581   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4582   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4583   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4584   maij = (Mat_MPIAIJ*) (*mat)->data;
4585   maij->donotstash     = PETSC_TRUE;
4586   (*mat)->preallocated = PETSC_TRUE;
4587 
4588   (*mat)->rmap.bs = (*mat)->cmap.bs = 1;
4589   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->rmap);CHKERRQ(ierr);
4590   ierr = PetscMapInitialize((*mat)->comm,&(*mat)->cmap);CHKERRQ(ierr);
4591 
4592   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
4593   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap.N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
4594 
4595   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4596   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4597   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4598   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599 
4600   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4601   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4602   PetscFunctionReturn(0);
4603 }
4604 
4605 /*
4606     Special version for direct calls from Fortran
4607 */
4608 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4609 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4610 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4611 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4612 #endif
4613 
4614 /* Change these macros so can be used in void function */
4615 #undef CHKERRQ
4616 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4617 #undef SETERRQ2
4618 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4619 #undef SETERRQ
4620 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4621 
4622 EXTERN_C_BEGIN
4623 #undef __FUNCT__
4624 #define __FUNCT__ "matsetvaluesmpiaij_"
4625 void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
4626 {
4627   Mat            mat = *mmat;
4628   PetscInt       m = *mm, n = *mn;
4629   InsertMode     addv = *maddv;
4630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4631   PetscScalar    value;
4632   PetscErrorCode ierr;
4633 
4634   MatPreallocated(mat);
4635   if (mat->insertmode == NOT_SET_VALUES) {
4636     mat->insertmode = addv;
4637   }
4638 #if defined(PETSC_USE_DEBUG)
4639   else if (mat->insertmode != addv) {
4640     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4641   }
4642 #endif
4643   {
4644   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4645   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4646   PetscTruth     roworiented = aij->roworiented;
4647 
4648   /* Some Variables required in the macro */
4649   Mat            A = aij->A;
4650   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4651   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4652   PetscScalar    *aa = a->a;
4653   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4654   Mat            B = aij->B;
4655   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4656   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4657   PetscScalar    *ba = b->a;
4658 
4659   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4660   PetscInt       nonew = a->nonew;
4661   PetscScalar    *ap1,*ap2;
4662 
4663   PetscFunctionBegin;
4664   for (i=0; i<m; i++) {
4665     if (im[i] < 0) continue;
4666 #if defined(PETSC_USE_DEBUG)
4667     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4668 #endif
4669     if (im[i] >= rstart && im[i] < rend) {
4670       row      = im[i] - rstart;
4671       lastcol1 = -1;
4672       rp1      = aj + ai[row];
4673       ap1      = aa + ai[row];
4674       rmax1    = aimax[row];
4675       nrow1    = ailen[row];
4676       low1     = 0;
4677       high1    = nrow1;
4678       lastcol2 = -1;
4679       rp2      = bj + bi[row];
4680       ap2      = ba + bi[row];
4681       rmax2    = bimax[row];
4682       nrow2    = bilen[row];
4683       low2     = 0;
4684       high2    = nrow2;
4685 
4686       for (j=0; j<n; j++) {
4687         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4688         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4689         if (in[j] >= cstart && in[j] < cend){
4690           col = in[j] - cstart;
4691           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4692         } else if (in[j] < 0) continue;
4693 #if defined(PETSC_USE_DEBUG)
4694         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4695 #endif
4696         else {
4697           if (mat->was_assembled) {
4698             if (!aij->colmap) {
4699               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4700             }
4701 #if defined (PETSC_USE_CTABLE)
4702             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4703 	    col--;
4704 #else
4705             col = aij->colmap[in[j]] - 1;
4706 #endif
4707             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4708               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4709               col =  in[j];
4710               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4711               B = aij->B;
4712               b = (Mat_SeqAIJ*)B->data;
4713               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4714               rp2      = bj + bi[row];
4715               ap2      = ba + bi[row];
4716               rmax2    = bimax[row];
4717               nrow2    = bilen[row];
4718               low2     = 0;
4719               high2    = nrow2;
4720               bm       = aij->B->rmap.n;
4721               ba = b->a;
4722             }
4723           } else col = in[j];
4724           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4725         }
4726       }
4727     } else {
4728       if (!aij->donotstash) {
4729         if (roworiented) {
4730           if (ignorezeroentries && v[i*n] == 0.0) continue;
4731           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4732         } else {
4733           if (ignorezeroentries && v[i] == 0.0) continue;
4734           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4735         }
4736       }
4737     }
4738   }}
4739   PetscFunctionReturnVoid();
4740 }
4741 EXTERN_C_END
4742 
4743