xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9123193a5494c007fc6bb70ddf14e0e13b58080a)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMult_mpiaij_mpidense_C","",PETSC_NULL);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatView_MPIAIJ_Binary"
742 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
743 {
744   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
745   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
746   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
747   PetscErrorCode    ierr;
748   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
749   int               fd;
750   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
751   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
752   PetscScalar       *column_values;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
756   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
757   nz   = A->nz + B->nz;
758   if (!rank) {
759     header[0] = MAT_FILE_COOKIE;
760     header[1] = mat->rmap.N;
761     header[2] = mat->cmap.N;
762     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
763     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
764     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
765     /* get largest number of rows any processor has */
766     rlen = mat->rmap.n;
767     range = mat->rmap.range;
768     for (i=1; i<size; i++) {
769       rlen = PetscMax(rlen,range[i+1] - range[i]);
770     }
771   } else {
772     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
773     rlen = mat->rmap.n;
774   }
775 
776   /* load up the local row counts */
777   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
778   for (i=0; i<mat->rmap.n; i++) {
779     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
780   }
781 
782   /* store the row lengths to the file */
783   if (!rank) {
784     MPI_Status status;
785     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
786     for (i=1; i<size; i++) {
787       rlen = range[i+1] - range[i];
788       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
789       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     }
791   } else {
792     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
793   }
794   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
795 
796   /* load up the local column indices */
797   nzmax = nz; /* )th processor needs space a largest processor needs */
798   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
799   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
800   cnt  = 0;
801   for (i=0; i<mat->rmap.n; i++) {
802     for (j=B->i[i]; j<B->i[i+1]; j++) {
803       if ( (col = garray[B->j[j]]) > cstart) break;
804       column_indices[cnt++] = col;
805     }
806     for (k=A->i[i]; k<A->i[i+1]; k++) {
807       column_indices[cnt++] = A->j[k] + cstart;
808     }
809     for (; j<B->i[i+1]; j++) {
810       column_indices[cnt++] = garray[B->j[j]];
811     }
812   }
813   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
814 
815   /* store the column indices to the file */
816   if (!rank) {
817     MPI_Status status;
818     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
821       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
822       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
823       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
824     }
825   } else {
826     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828   }
829   ierr = PetscFree(column_indices);CHKERRQ(ierr);
830 
831   /* load up the local column values */
832   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
833   cnt  = 0;
834   for (i=0; i<mat->rmap.n; i++) {
835     for (j=B->i[i]; j<B->i[i+1]; j++) {
836       if ( garray[B->j[j]] > cstart) break;
837       column_values[cnt++] = B->a[j];
838     }
839     for (k=A->i[i]; k<A->i[i+1]; k++) {
840       column_values[cnt++] = A->a[k];
841     }
842     for (; j<B->i[i+1]; j++) {
843       column_values[cnt++] = B->a[j];
844     }
845   }
846   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
847 
848   /* store the column values to the file */
849   if (!rank) {
850     MPI_Status status;
851     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
852     for (i=1; i<size; i++) {
853       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
855       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
856       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
857     }
858   } else {
859     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
860     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(column_values);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
868 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
869 {
870   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
871   PetscErrorCode    ierr;
872   PetscMPIInt       rank = aij->rank,size = aij->size;
873   PetscTruth        isdraw,iascii,isbinary;
874   PetscViewer       sviewer;
875   PetscViewerFormat format;
876 
877   PetscFunctionBegin;
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
881   if (iascii) {
882     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
883     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
884       MatInfo    info;
885       PetscTruth inodes;
886 
887       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
888       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
890       if (!inodes) {
891         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
892 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
893       } else {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
895 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       }
897       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
898       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
899       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
900       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
901       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
902       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
903       PetscFunctionReturn(0);
904     } else if (format == PETSC_VIEWER_ASCII_INFO) {
905       PetscInt   inodecount,inodelimit,*inodes;
906       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
907       if (inodes) {
908         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
909       } else {
910         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
911       }
912       PetscFunctionReturn(0);
913     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
914       PetscFunctionReturn(0);
915     }
916   } else if (isbinary) {
917     if (size == 1) {
918       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
919       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
920     } else {
921       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
922     }
923     PetscFunctionReturn(0);
924   } else if (isdraw) {
925     PetscDraw  draw;
926     PetscTruth isnull;
927     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
928     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
929   }
930 
931   if (size == 1) {
932     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934   } else {
935     /* assemble the entire matrix onto first processor. */
936     Mat         A;
937     Mat_SeqAIJ  *Aloc;
938     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
939     PetscScalar *a;
940 
941     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
942     if (!rank) {
943       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
944     } else {
945       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
946     }
947     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
948     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
949     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
950     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
951 
952     /* copy over the A part */
953     Aloc = (Mat_SeqAIJ*)aij->A->data;
954     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
955     row = mat->rmap.rstart;
956     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
957     for (i=0; i<m; i++) {
958       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
959       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
960     }
961     aj = Aloc->j;
962     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
963 
964     /* copy over the B part */
965     Aloc = (Mat_SeqAIJ*)aij->B->data;
966     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row  = mat->rmap.rstart;
968     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
969     ct   = cols;
970     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
971     for (i=0; i<m; i++) {
972       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
973       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
974     }
975     ierr = PetscFree(ct);CHKERRQ(ierr);
976     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     /*
979        Everyone has to call to draw the matrix since the graphics waits are
980        synchronized across all processors that share the PetscDraw object
981     */
982     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     if (!rank) {
984       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
985       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
986     }
987     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
988     ierr = MatDestroy(A);CHKERRQ(ierr);
989   }
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatView_MPIAIJ"
995 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
996 {
997   PetscErrorCode ierr;
998   PetscTruth     iascii,isdraw,issocket,isbinary;
999 
1000   PetscFunctionBegin;
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1005   if (iascii || isdraw || isbinary || issocket) {
1006     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1007   } else {
1008     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1009   }
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatRelax_MPIAIJ"
1017 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1018 {
1019   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1020   PetscErrorCode ierr;
1021   Vec            bb1;
1022 
1023   PetscFunctionBegin;
1024   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1025 
1026   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1027 
1028   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1029     if (flag & SOR_ZERO_INITIAL_GUESS) {
1030       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1031       its--;
1032     }
1033 
1034     while (its--) {
1035       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037 
1038       /* update rhs: bb1 = bb - B*x */
1039       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1040       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1041 
1042       /* local sweep */
1043       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1044       CHKERRQ(ierr);
1045     }
1046   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1047     if (flag & SOR_ZERO_INITIAL_GUESS) {
1048       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1049       its--;
1050     }
1051     while (its--) {
1052       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054 
1055       /* update rhs: bb1 = bb - B*x */
1056       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1057       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1058 
1059       /* local sweep */
1060       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1061       CHKERRQ(ierr);
1062     }
1063   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1064     if (flag & SOR_ZERO_INITIAL_GUESS) {
1065       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1066       its--;
1067     }
1068     while (its--) {
1069       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071 
1072       /* update rhs: bb1 = bb - B*x */
1073       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1074       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1075 
1076       /* local sweep */
1077       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1078       CHKERRQ(ierr);
1079     }
1080   } else {
1081     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1082   }
1083 
1084   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatPermute_MPIAIJ"
1090 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1091 {
1092   MPI_Comm       comm,pcomm;
1093   PetscInt       first,local_size,nrows,*rows;
1094   int            ntids;
1095   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1100   /* make a collective version of 'rowp' */
1101   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1102   if (pcomm==comm) {
1103     crowp = rowp;
1104   } else {
1105     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1106     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1107     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1108     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1109   }
1110   /* collect the global row permutation and invert it */
1111   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1112   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1113   if (pcomm!=comm) {
1114     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1115   }
1116   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1117   /* get the local target indices */
1118   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1121   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1122   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1123   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1124   /* the column permutation is so much easier;
1125      make a local version of 'colp' and invert it */
1126   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1127   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1128   if (ntids==1) {
1129     lcolp = colp;
1130   } else {
1131     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1132     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1133     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1134   }
1135   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1136   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1137   if (ntids>1) {
1138     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1139     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1140   }
1141   /* now we just get the submatrix */
1142   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1143   /* clean up */
1144   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1145   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1151 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1152 {
1153   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1154   Mat            A = mat->A,B = mat->B;
1155   PetscErrorCode ierr;
1156   PetscReal      isend[5],irecv[5];
1157 
1158   PetscFunctionBegin;
1159   info->block_size     = 1.0;
1160   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1162   isend[3] = info->memory;  isend[4] = info->mallocs;
1163   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1165   isend[3] += info->memory;  isend[4] += info->mallocs;
1166   if (flag == MAT_LOCAL) {
1167     info->nz_used      = isend[0];
1168     info->nz_allocated = isend[1];
1169     info->nz_unneeded  = isend[2];
1170     info->memory       = isend[3];
1171     info->mallocs      = isend[4];
1172   } else if (flag == MAT_GLOBAL_MAX) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else if (flag == MAT_GLOBAL_SUM) {
1180     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1181     info->nz_used      = irecv[0];
1182     info->nz_allocated = irecv[1];
1183     info->nz_unneeded  = irecv[2];
1184     info->memory       = irecv[3];
1185     info->mallocs      = irecv[4];
1186   }
1187   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1188   info->fill_ratio_needed = 0;
1189   info->factor_mallocs    = 0;
1190   info->rows_global       = (double)matin->rmap.N;
1191   info->columns_global    = (double)matin->cmap.N;
1192   info->rows_local        = (double)matin->rmap.n;
1193   info->columns_local     = (double)matin->cmap.N;
1194 
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatSetOption_MPIAIJ"
1200 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1201 {
1202   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   switch (op) {
1207   case MAT_NO_NEW_NONZERO_LOCATIONS:
1208   case MAT_YES_NEW_NONZERO_LOCATIONS:
1209   case MAT_COLUMNS_UNSORTED:
1210   case MAT_COLUMNS_SORTED:
1211   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1212   case MAT_KEEP_ZEROED_ROWS:
1213   case MAT_NEW_NONZERO_LOCATION_ERR:
1214   case MAT_USE_INODES:
1215   case MAT_DO_NOT_USE_INODES:
1216   case MAT_IGNORE_ZERO_ENTRIES:
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROW_ORIENTED:
1221     a->roworiented = PETSC_TRUE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_ROWS_SORTED:
1226   case MAT_ROWS_UNSORTED:
1227   case MAT_YES_NEW_DIAGONALS:
1228     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1229     break;
1230   case MAT_COLUMN_ORIENTED:
1231     a->roworiented = PETSC_FALSE;
1232     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1233     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_OFF_PROC_ENTRIES:
1236     a->donotstash = PETSC_TRUE;
1237     break;
1238   case MAT_NO_NEW_DIAGONALS:
1239     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     break;
1248   case MAT_NOT_SYMMETRIC:
1249   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1250   case MAT_NOT_HERMITIAN:
1251   case MAT_NOT_SYMMETRY_ETERNAL:
1252     break;
1253   default:
1254     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetRow_MPIAIJ"
1261 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1262 {
1263   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1264   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1265   PetscErrorCode ierr;
1266   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1267   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1268   PetscInt       *cmap,*idx_p;
1269 
1270   PetscFunctionBegin;
1271   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1272   mat->getrowactive = PETSC_TRUE;
1273 
1274   if (!mat->rowvalues && (idx || v)) {
1275     /*
1276         allocate enough space to hold information from the longest row.
1277     */
1278     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1279     PetscInt     max = 1,tmp;
1280     for (i=0; i<matin->rmap.n; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1282       if (max < tmp) { max = tmp; }
1283     }
1284     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1285     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1286   }
1287 
1288   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1289   lrow = row - rstart;
1290 
1291   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1292   if (!v)   {pvA = 0; pvB = 0;}
1293   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1294   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1295   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1296   nztot = nzA + nzB;
1297 
1298   cmap  = mat->garray;
1299   if (v  || idx) {
1300     if (nztot) {
1301       /* Sort by increasing column numbers, assuming A and B already sorted */
1302       PetscInt imark = -1;
1303       if (v) {
1304         *v = v_p = mat->rowvalues;
1305         for (i=0; i<nzB; i++) {
1306           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1307           else break;
1308         }
1309         imark = i;
1310         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1311         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1312       }
1313       if (idx) {
1314         *idx = idx_p = mat->rowindices;
1315         if (imark > -1) {
1316           for (i=0; i<imark; i++) {
1317             idx_p[i] = cmap[cworkB[i]];
1318           }
1319         } else {
1320           for (i=0; i<nzB; i++) {
1321             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1342 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!aij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1349   }
1350   aij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatNorm_MPIAIJ"
1356 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1357 {
1358   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1359   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,j,cstart = mat->cmap.rstart;
1362   PetscReal      sum = 0.0;
1363   PetscScalar    *v;
1364 
1365   PetscFunctionBegin;
1366   if (aij->size == 1) {
1367     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1368   } else {
1369     if (type == NORM_FROBENIUS) {
1370       v = amat->a;
1371       for (i=0; i<amat->nz; i++) {
1372 #if defined(PETSC_USE_COMPLEX)
1373         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1374 #else
1375         sum += (*v)*(*v); v++;
1376 #endif
1377       }
1378       v = bmat->a;
1379       for (i=0; i<bmat->nz; i++) {
1380 #if defined(PETSC_USE_COMPLEX)
1381         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1382 #else
1383         sum += (*v)*(*v); v++;
1384 #endif
1385       }
1386       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1387       *norm = sqrt(*norm);
1388     } else if (type == NORM_1) { /* max column norm */
1389       PetscReal *tmp,*tmp2;
1390       PetscInt    *jj,*garray = aij->garray;
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1393       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1394       *norm = 0.0;
1395       v = amat->a; jj = amat->j;
1396       for (j=0; j<amat->nz; j++) {
1397         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1398       }
1399       v = bmat->a; jj = bmat->j;
1400       for (j=0; j<bmat->nz; j++) {
1401         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1402       }
1403       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1404       for (j=0; j<mat->cmap.N; j++) {
1405         if (tmp2[j] > *norm) *norm = tmp2[j];
1406       }
1407       ierr = PetscFree(tmp);CHKERRQ(ierr);
1408       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1409     } else if (type == NORM_INFINITY) { /* max row norm */
1410       PetscReal ntemp = 0.0;
1411       for (j=0; j<aij->A->rmap.n; j++) {
1412         v = amat->a + amat->i[j];
1413         sum = 0.0;
1414         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1415           sum += PetscAbsScalar(*v); v++;
1416         }
1417         v = bmat->a + bmat->i[j];
1418         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1419           sum += PetscAbsScalar(*v); v++;
1420         }
1421         if (sum > ntemp) ntemp = sum;
1422       }
1423       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1424     } else {
1425       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1426     }
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatTranspose_MPIAIJ"
1433 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1434 {
1435   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1436   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1437   PetscErrorCode ierr;
1438   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1439   Mat            B;
1440   PetscScalar    *array;
1441 
1442   PetscFunctionBegin;
1443   if (!matout && M != N) {
1444     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1445   }
1446 
1447   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1448   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1449   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1450   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1451 
1452   /* copy over the A part */
1453   Aloc = (Mat_SeqAIJ*)a->A->data;
1454   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1455   row = A->rmap.rstart;
1456   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1457   for (i=0; i<m; i++) {
1458     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1459     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1460   }
1461   aj = Aloc->j;
1462   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1463 
1464   /* copy over the B part */
1465   Aloc = (Mat_SeqAIJ*)a->B->data;
1466   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1467   row  = A->rmap.rstart;
1468   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1469   ct   = cols;
1470   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1471   for (i=0; i<m; i++) {
1472     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1473     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1474   }
1475   ierr = PetscFree(ct);CHKERRQ(ierr);
1476   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   if (matout) {
1479     *matout = B;
1480   } else {
1481     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1488 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1489 {
1490   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1491   Mat            a = aij->A,b = aij->B;
1492   PetscErrorCode ierr;
1493   PetscInt       s1,s2,s3;
1494 
1495   PetscFunctionBegin;
1496   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1497   if (rr) {
1498     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1499     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1500     /* Overlap communication with computation. */
1501     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1502   }
1503   if (ll) {
1504     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1505     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1506     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1507   }
1508   /* scale  the diagonal block */
1509   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1510 
1511   if (rr) {
1512     /* Do a scatter end and then right scale the off-diagonal block */
1513     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1514     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1515   }
1516 
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1523 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1524 {
1525   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1526   PetscErrorCode ierr;
1527 
1528   PetscFunctionBegin;
1529   if (!a->rank) {
1530     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1537 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1538 {
1539   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1544   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1549 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1550 {
1551   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatEqual_MPIAIJ"
1561 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1562 {
1563   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1564   Mat            a,b,c,d;
1565   PetscTruth     flg;
1566   PetscErrorCode ierr;
1567 
1568   PetscFunctionBegin;
1569   a = matA->A; b = matA->B;
1570   c = matB->A; d = matB->B;
1571 
1572   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1573   if (flg) {
1574     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1575   }
1576   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatCopy_MPIAIJ"
1582 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1583 {
1584   PetscErrorCode ierr;
1585   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1586   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1587 
1588   PetscFunctionBegin;
1589   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1590   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1591     /* because of the column compression in the off-processor part of the matrix a->B,
1592        the number of columns in a->B and b->B may be different, hence we cannot call
1593        the MatCopy() directly on the two parts. If need be, we can provide a more
1594        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1595        then copying the submatrices */
1596     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1597   } else {
1598     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1599     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1600   }
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1606 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1607 {
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 #include "petscblaslapack.h"
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatAXPY_MPIAIJ"
1618 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1619 {
1620   PetscErrorCode ierr;
1621   PetscInt       i;
1622   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1623   PetscBLASInt   bnz,one=1;
1624   Mat_SeqAIJ     *x,*y;
1625 
1626   PetscFunctionBegin;
1627   if (str == SAME_NONZERO_PATTERN) {
1628     PetscScalar alpha = a;
1629     x = (Mat_SeqAIJ *)xx->A->data;
1630     y = (Mat_SeqAIJ *)yy->A->data;
1631     bnz = (PetscBLASInt)x->nz;
1632     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1633     x = (Mat_SeqAIJ *)xx->B->data;
1634     y = (Mat_SeqAIJ *)yy->B->data;
1635     bnz = (PetscBLASInt)x->nz;
1636     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1637   } else if (str == SUBSET_NONZERO_PATTERN) {
1638     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1639 
1640     x = (Mat_SeqAIJ *)xx->B->data;
1641     y = (Mat_SeqAIJ *)yy->B->data;
1642     if (y->xtoy && y->XtoY != xx->B) {
1643       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1644       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1645     }
1646     if (!y->xtoy) { /* get xtoy */
1647       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1648       y->XtoY = xx->B;
1649     }
1650     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1651   } else {
1652     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1658 
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatConjugate_MPIAIJ"
1661 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1662 {
1663 #if defined(PETSC_USE_COMPLEX)
1664   PetscErrorCode ierr;
1665   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1666 
1667   PetscFunctionBegin;
1668   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1669   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1670 #else
1671   PetscFunctionBegin;
1672 #endif
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatRealPart_MPIAIJ"
1678 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1679 {
1680   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1681   PetscErrorCode ierr;
1682 
1683   PetscFunctionBegin;
1684   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1685   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1691 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1692 {
1693   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1694   PetscErrorCode ierr;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1698   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 /* -------------------------------------------------------------------*/
1703 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1704        MatGetRow_MPIAIJ,
1705        MatRestoreRow_MPIAIJ,
1706        MatMult_MPIAIJ,
1707 /* 4*/ MatMultAdd_MPIAIJ,
1708        MatMultTranspose_MPIAIJ,
1709        MatMultTransposeAdd_MPIAIJ,
1710        0,
1711        0,
1712        0,
1713 /*10*/ 0,
1714        0,
1715        0,
1716        MatRelax_MPIAIJ,
1717        MatTranspose_MPIAIJ,
1718 /*15*/ MatGetInfo_MPIAIJ,
1719        MatEqual_MPIAIJ,
1720        MatGetDiagonal_MPIAIJ,
1721        MatDiagonalScale_MPIAIJ,
1722        MatNorm_MPIAIJ,
1723 /*20*/ MatAssemblyBegin_MPIAIJ,
1724        MatAssemblyEnd_MPIAIJ,
1725        0,
1726        MatSetOption_MPIAIJ,
1727        MatZeroEntries_MPIAIJ,
1728 /*25*/ MatZeroRows_MPIAIJ,
1729        0,
1730        0,
1731        0,
1732        0,
1733 /*30*/ MatSetUpPreallocation_MPIAIJ,
1734        0,
1735        0,
1736        0,
1737        0,
1738 /*35*/ MatDuplicate_MPIAIJ,
1739        0,
1740        0,
1741        0,
1742        0,
1743 /*40*/ MatAXPY_MPIAIJ,
1744        MatGetSubMatrices_MPIAIJ,
1745        MatIncreaseOverlap_MPIAIJ,
1746        MatGetValues_MPIAIJ,
1747        MatCopy_MPIAIJ,
1748 /*45*/ MatPrintHelp_MPIAIJ,
1749        MatScale_MPIAIJ,
1750        0,
1751        0,
1752        0,
1753 /*50*/ MatSetBlockSize_MPIAIJ,
1754        0,
1755        0,
1756        0,
1757        0,
1758 /*55*/ MatFDColoringCreate_MPIAIJ,
1759        0,
1760        MatSetUnfactored_MPIAIJ,
1761        MatPermute_MPIAIJ,
1762        0,
1763 /*60*/ MatGetSubMatrix_MPIAIJ,
1764        MatDestroy_MPIAIJ,
1765        MatView_MPIAIJ,
1766        0,
1767        0,
1768 /*65*/ 0,
1769        0,
1770        0,
1771        0,
1772        0,
1773 /*70*/ 0,
1774        0,
1775        MatSetColoring_MPIAIJ,
1776 #if defined(PETSC_HAVE_ADIC)
1777        MatSetValuesAdic_MPIAIJ,
1778 #else
1779        0,
1780 #endif
1781        MatSetValuesAdifor_MPIAIJ,
1782 /*75*/ 0,
1783        0,
1784        0,
1785        0,
1786        0,
1787 /*80*/ 0,
1788        0,
1789        0,
1790        0,
1791 /*84*/ MatLoad_MPIAIJ,
1792        0,
1793        0,
1794        0,
1795        0,
1796        0,
1797 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1798        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1799        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1800        MatPtAP_Basic,
1801        MatPtAPSymbolic_MPIAIJ,
1802 /*95*/ MatPtAPNumeric_MPIAIJ,
1803        0,
1804        0,
1805        0,
1806        0,
1807 /*100*/0,
1808        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1809        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1810        MatConjugate_MPIAIJ,
1811        0,
1812 /*105*/MatSetValuesRow_MPIAIJ,
1813        MatRealPart_MPIAIJ,
1814        MatImaginaryPart_MPIAIJ};
1815 
1816 /* ----------------------------------------------------------------------------------------*/
1817 
1818 EXTERN_C_BEGIN
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1821 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1822 {
1823   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1824   PetscErrorCode ierr;
1825 
1826   PetscFunctionBegin;
1827   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1828   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 EXTERN_C_END
1832 
1833 EXTERN_C_BEGIN
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1836 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1837 {
1838   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1839   PetscErrorCode ierr;
1840 
1841   PetscFunctionBegin;
1842   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1843   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 EXTERN_C_END
1847 
1848 #include "petscpc.h"
1849 EXTERN_C_BEGIN
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1852 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1853 {
1854   Mat_MPIAIJ     *b;
1855   PetscErrorCode ierr;
1856   PetscInt       i;
1857 
1858   PetscFunctionBegin;
1859   B->preallocated = PETSC_TRUE;
1860   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1861   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1862   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1863   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1864 
1865   B->rmap.bs = B->cmap.bs = 1;
1866   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1867   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1868   if (d_nnz) {
1869     for (i=0; i<B->rmap.n; i++) {
1870       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1871     }
1872   }
1873   if (o_nnz) {
1874     for (i=0; i<B->rmap.n; i++) {
1875       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1876     }
1877   }
1878   b = (Mat_MPIAIJ*)B->data;
1879 
1880   /* Explicitly create 2 MATSEQAIJ matrices. */
1881   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1882   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1883   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1884   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1885   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1886   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1887   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1888   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1889 
1890   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1891   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1892 
1893   PetscFunctionReturn(0);
1894 }
1895 EXTERN_C_END
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1899 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1900 {
1901   Mat            mat;
1902   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1903   PetscErrorCode ierr;
1904 
1905   PetscFunctionBegin;
1906   *newmat       = 0;
1907   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1908   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1909   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1910   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1911   a    = (Mat_MPIAIJ*)mat->data;
1912 
1913   mat->factor       = matin->factor;
1914   mat->rmap.bs      = matin->rmap.bs;
1915   mat->assembled    = PETSC_TRUE;
1916   mat->insertmode   = NOT_SET_VALUES;
1917   mat->preallocated = PETSC_TRUE;
1918 
1919   a->size           = oldmat->size;
1920   a->rank           = oldmat->rank;
1921   a->donotstash     = oldmat->donotstash;
1922   a->roworiented    = oldmat->roworiented;
1923   a->rowindices     = 0;
1924   a->rowvalues      = 0;
1925   a->getrowactive   = PETSC_FALSE;
1926 
1927   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1928   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1929 
1930   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1931   if (oldmat->colmap) {
1932 #if defined (PETSC_USE_CTABLE)
1933     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1934 #else
1935     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1936     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1937     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1938 #endif
1939   } else a->colmap = 0;
1940   if (oldmat->garray) {
1941     PetscInt len;
1942     len  = oldmat->B->cmap.n;
1943     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1944     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1945     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1946   } else a->garray = 0;
1947 
1948   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1949   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1950   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1951   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1952   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1953   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1954   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1955   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1956   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1957   *newmat = mat;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #include "petscsys.h"
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatLoad_MPIAIJ"
1965 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1966 {
1967   Mat            A;
1968   PetscScalar    *vals,*svals;
1969   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1970   MPI_Status     status;
1971   PetscErrorCode ierr;
1972   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1973   PetscInt       i,nz,j,rstart,rend,mmax;
1974   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1975   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1976   PetscInt       cend,cstart,n,*rowners;
1977   int            fd;
1978 
1979   PetscFunctionBegin;
1980   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1981   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1982   if (!rank) {
1983     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1984     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1985     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1986   }
1987 
1988   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1989   M = header[1]; N = header[2];
1990   /* determine ownership of all rows */
1991   m    = M/size + ((M % size) > rank);
1992   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1993   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1994 
1995   /* First process needs enough room for process with most rows */
1996   if (!rank) {
1997     mmax       = rowners[1];
1998     for (i=2; i<size; i++) {
1999       mmax = PetscMax(mmax,rowners[i]);
2000     }
2001   } else mmax = m;
2002 
2003   rowners[0] = 0;
2004   for (i=2; i<=size; i++) {
2005     mmax       = PetscMax(mmax,rowners[i]);
2006     rowners[i] += rowners[i-1];
2007   }
2008   rstart = rowners[rank];
2009   rend   = rowners[rank+1];
2010 
2011   /* distribute row lengths to all processors */
2012   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2013   if (!rank) {
2014     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2015     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2016     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2017     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2018     for (j=0; j<m; j++) {
2019       procsnz[0] += ourlens[j];
2020     }
2021     for (i=1; i<size; i++) {
2022       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2023       /* calculate the number of nonzeros on each processor */
2024       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2025         procsnz[i] += rowlengths[j];
2026       }
2027       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2028     }
2029     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2030   } else {
2031     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2032   }
2033 
2034   if (!rank) {
2035     /* determine max buffer needed and allocate it */
2036     maxnz = 0;
2037     for (i=0; i<size; i++) {
2038       maxnz = PetscMax(maxnz,procsnz[i]);
2039     }
2040     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2041 
2042     /* read in my part of the matrix column indices  */
2043     nz   = procsnz[0];
2044     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2045     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2046 
2047     /* read in every one elses and ship off */
2048     for (i=1; i<size; i++) {
2049       nz   = procsnz[i];
2050       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2051       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2052     }
2053     ierr = PetscFree(cols);CHKERRQ(ierr);
2054   } else {
2055     /* determine buffer space needed for message */
2056     nz = 0;
2057     for (i=0; i<m; i++) {
2058       nz += ourlens[i];
2059     }
2060     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2061 
2062     /* receive message of column indices*/
2063     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2064     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2065     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2066   }
2067 
2068   /* determine column ownership if matrix is not square */
2069   if (N != M) {
2070     n      = N/size + ((N % size) > rank);
2071     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2072     cstart = cend - n;
2073   } else {
2074     cstart = rstart;
2075     cend   = rend;
2076     n      = cend - cstart;
2077   }
2078 
2079   /* loop over local rows, determining number of off diagonal entries */
2080   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2081   jj = 0;
2082   for (i=0; i<m; i++) {
2083     for (j=0; j<ourlens[i]; j++) {
2084       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2085       jj++;
2086     }
2087   }
2088 
2089   /* create our matrix */
2090   for (i=0; i<m; i++) {
2091     ourlens[i] -= offlens[i];
2092   }
2093   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2094   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2095   ierr = MatSetType(A,type);CHKERRQ(ierr);
2096   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2097 
2098   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2099   for (i=0; i<m; i++) {
2100     ourlens[i] += offlens[i];
2101   }
2102 
2103   if (!rank) {
2104     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2105 
2106     /* read in my part of the matrix numerical values  */
2107     nz   = procsnz[0];
2108     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2109 
2110     /* insert into matrix */
2111     jj      = rstart;
2112     smycols = mycols;
2113     svals   = vals;
2114     for (i=0; i<m; i++) {
2115       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2116       smycols += ourlens[i];
2117       svals   += ourlens[i];
2118       jj++;
2119     }
2120 
2121     /* read in other processors and ship out */
2122     for (i=1; i<size; i++) {
2123       nz   = procsnz[i];
2124       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2125       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2126     }
2127     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2128   } else {
2129     /* receive numeric values */
2130     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2131 
2132     /* receive message of values*/
2133     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2134     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2135     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2136 
2137     /* insert into matrix */
2138     jj      = rstart;
2139     smycols = mycols;
2140     svals   = vals;
2141     for (i=0; i<m; i++) {
2142       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2143       smycols += ourlens[i];
2144       svals   += ourlens[i];
2145       jj++;
2146     }
2147   }
2148   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2149   ierr = PetscFree(vals);CHKERRQ(ierr);
2150   ierr = PetscFree(mycols);CHKERRQ(ierr);
2151   ierr = PetscFree(rowners);CHKERRQ(ierr);
2152 
2153   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   *newmat = A;
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2161 /*
2162     Not great since it makes two copies of the submatrix, first an SeqAIJ
2163   in local and then by concatenating the local matrices the end result.
2164   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2165 */
2166 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2167 {
2168   PetscErrorCode ierr;
2169   PetscMPIInt    rank,size;
2170   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2171   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2172   Mat            *local,M,Mreuse;
2173   PetscScalar    *vwork,*aa;
2174   MPI_Comm       comm = mat->comm;
2175   Mat_SeqAIJ     *aij;
2176 
2177 
2178   PetscFunctionBegin;
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2181 
2182   if (call ==  MAT_REUSE_MATRIX) {
2183     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2184     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2185     local = &Mreuse;
2186     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2187   } else {
2188     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2189     Mreuse = *local;
2190     ierr   = PetscFree(local);CHKERRQ(ierr);
2191   }
2192 
2193   /*
2194       m - number of local rows
2195       n - number of columns (same on all processors)
2196       rstart - first row in new global matrix generated
2197   */
2198   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2199   if (call == MAT_INITIAL_MATRIX) {
2200     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2201     ii  = aij->i;
2202     jj  = aij->j;
2203 
2204     /*
2205         Determine the number of non-zeros in the diagonal and off-diagonal
2206         portions of the matrix in order to do correct preallocation
2207     */
2208 
2209     /* first get start and end of "diagonal" columns */
2210     if (csize == PETSC_DECIDE) {
2211       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2212       if (mglobal == n) { /* square matrix */
2213 	nlocal = m;
2214       } else {
2215         nlocal = n/size + ((n % size) > rank);
2216       }
2217     } else {
2218       nlocal = csize;
2219     }
2220     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2221     rstart = rend - nlocal;
2222     if (rank == size - 1 && rend != n) {
2223       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2224     }
2225 
2226     /* next, compute all the lengths */
2227     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2228     olens = dlens + m;
2229     for (i=0; i<m; i++) {
2230       jend = ii[i+1] - ii[i];
2231       olen = 0;
2232       dlen = 0;
2233       for (j=0; j<jend; j++) {
2234         if (*jj < rstart || *jj >= rend) olen++;
2235         else dlen++;
2236         jj++;
2237       }
2238       olens[i] = olen;
2239       dlens[i] = dlen;
2240     }
2241     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2242     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2243     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2244     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2245     ierr = PetscFree(dlens);CHKERRQ(ierr);
2246   } else {
2247     PetscInt ml,nl;
2248 
2249     M = *newmat;
2250     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2251     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2252     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2253     /*
2254          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2255        rather than the slower MatSetValues().
2256     */
2257     M->was_assembled = PETSC_TRUE;
2258     M->assembled     = PETSC_FALSE;
2259   }
2260   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2261   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2262   ii  = aij->i;
2263   jj  = aij->j;
2264   aa  = aij->a;
2265   for (i=0; i<m; i++) {
2266     row   = rstart + i;
2267     nz    = ii[i+1] - ii[i];
2268     cwork = jj;     jj += nz;
2269     vwork = aa;     aa += nz;
2270     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2271   }
2272 
2273   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275   *newmat = M;
2276 
2277   /* save submatrix used in processor for next request */
2278   if (call ==  MAT_INITIAL_MATRIX) {
2279     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2280     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2281   }
2282 
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 EXTERN_C_BEGIN
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2289 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2290 {
2291   PetscInt       m,cstart, cend,j,nnz,i,d;
2292   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2293   const PetscInt *JJ;
2294   PetscScalar    *values;
2295   PetscErrorCode ierr;
2296 
2297   PetscFunctionBegin;
2298   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2299 
2300   B->rmap.bs = B->cmap.bs = 1;
2301   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2302   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2303   m      = B->rmap.n;
2304   cstart = B->cmap.rstart;
2305   cend   = B->cmap.rend;
2306   rstart = B->rmap.rstart;
2307 
2308   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2309   o_nnz = d_nnz + m;
2310 
2311   for (i=0; i<m; i++) {
2312     nnz     = I[i+1]- I[i];
2313     JJ      = J + I[i];
2314     nnz_max = PetscMax(nnz_max,nnz);
2315     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2316     for (j=0; j<nnz; j++) {
2317       if (*JJ >= cstart) break;
2318       JJ++;
2319     }
2320     d = 0;
2321     for (; j<nnz; j++) {
2322       if (*JJ++ >= cend) break;
2323       d++;
2324     }
2325     d_nnz[i] = d;
2326     o_nnz[i] = nnz - d;
2327   }
2328   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2329   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2330 
2331   if (v) values = (PetscScalar*)v;
2332   else {
2333     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2334     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2335   }
2336 
2337   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2338   for (i=0; i<m; i++) {
2339     ii   = i + rstart;
2340     nnz  = I[i+1]- I[i];
2341     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2342   }
2343   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2345   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2346 
2347   if (!v) {
2348     ierr = PetscFree(values);CHKERRQ(ierr);
2349   }
2350   PetscFunctionReturn(0);
2351 }
2352 EXTERN_C_END
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2356 /*@
2357    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2358    (the default parallel PETSc format).
2359 
2360    Collective on MPI_Comm
2361 
2362    Input Parameters:
2363 +  B - the matrix
2364 .  i - the indices into j for the start of each local row (starts with zero)
2365 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2366 -  v - optional values in the matrix
2367 
2368    Level: developer
2369 
2370    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2371      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2372 
2373 .keywords: matrix, aij, compressed row, sparse, parallel
2374 
2375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2376           MatCreateSeqAIJWithArrays()
2377 @*/
2378 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2379 {
2380   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2381 
2382   PetscFunctionBegin;
2383   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2384   if (f) {
2385     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2392 /*@C
2393    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2394    (the default parallel PETSc format).  For good matrix assembly performance
2395    the user should preallocate the matrix storage by setting the parameters
2396    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2397    performance can be increased by more than a factor of 50.
2398 
2399    Collective on MPI_Comm
2400 
2401    Input Parameters:
2402 +  A - the matrix
2403 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2404            (same value is used for all local rows)
2405 .  d_nnz - array containing the number of nonzeros in the various rows of the
2406            DIAGONAL portion of the local submatrix (possibly different for each row)
2407            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2408            The size of this array is equal to the number of local rows, i.e 'm'.
2409            You must leave room for the diagonal entry even if it is zero.
2410 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2411            submatrix (same value is used for all local rows).
2412 -  o_nnz - array containing the number of nonzeros in the various rows of the
2413            OFF-DIAGONAL portion of the local submatrix (possibly different for
2414            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2415            structure. The size of this array is equal to the number
2416            of local rows, i.e 'm'.
2417 
2418    If the *_nnz parameter is given then the *_nz parameter is ignored
2419 
2420    The AIJ format (also called the Yale sparse matrix format or
2421    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2422    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2423 
2424    The parallel matrix is partitioned such that the first m0 rows belong to
2425    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2426    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2427 
2428    The DIAGONAL portion of the local submatrix of a processor can be defined
2429    as the submatrix which is obtained by extraction the part corresponding
2430    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2431    first row that belongs to the processor, and r2 is the last row belonging
2432    to the this processor. This is a square mxm matrix. The remaining portion
2433    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2434 
2435    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2436 
2437    Example usage:
2438 
2439    Consider the following 8x8 matrix with 34 non-zero values, that is
2440    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2441    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2442    as follows:
2443 
2444 .vb
2445             1  2  0  |  0  3  0  |  0  4
2446     Proc0   0  5  6  |  7  0  0  |  8  0
2447             9  0 10  | 11  0  0  | 12  0
2448     -------------------------------------
2449            13  0 14  | 15 16 17  |  0  0
2450     Proc1   0 18  0  | 19 20 21  |  0  0
2451             0  0  0  | 22 23  0  | 24  0
2452     -------------------------------------
2453     Proc2  25 26 27  |  0  0 28  | 29  0
2454            30  0  0  | 31 32 33  |  0 34
2455 .ve
2456 
2457    This can be represented as a collection of submatrices as:
2458 
2459 .vb
2460       A B C
2461       D E F
2462       G H I
2463 .ve
2464 
2465    Where the submatrices A,B,C are owned by proc0, D,E,F are
2466    owned by proc1, G,H,I are owned by proc2.
2467 
2468    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2469    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2470    The 'M','N' parameters are 8,8, and have the same values on all procs.
2471 
2472    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2473    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2474    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2475    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2476    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2477    matrix, ans [DF] as another SeqAIJ matrix.
2478 
2479    When d_nz, o_nz parameters are specified, d_nz storage elements are
2480    allocated for every row of the local diagonal submatrix, and o_nz
2481    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2482    One way to choose d_nz and o_nz is to use the max nonzerors per local
2483    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2484    In this case, the values of d_nz,o_nz are:
2485 .vb
2486      proc0 : dnz = 2, o_nz = 2
2487      proc1 : dnz = 3, o_nz = 2
2488      proc2 : dnz = 1, o_nz = 4
2489 .ve
2490    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2491    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2492    for proc3. i.e we are using 12+15+10=37 storage locations to store
2493    34 values.
2494 
2495    When d_nnz, o_nnz parameters are specified, the storage is specified
2496    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2497    In the above case the values for d_nnz,o_nnz are:
2498 .vb
2499      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2500      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2501      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2502 .ve
2503    Here the space allocated is sum of all the above values i.e 34, and
2504    hence pre-allocation is perfect.
2505 
2506    Level: intermediate
2507 
2508 .keywords: matrix, aij, compressed row, sparse, parallel
2509 
2510 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2511           MPIAIJ
2512 @*/
2513 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2514 {
2515   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2516 
2517   PetscFunctionBegin;
2518   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2519   if (f) {
2520     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2521   }
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2527 /*@C
2528      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2529          CSR format the local rows.
2530 
2531    Collective on MPI_Comm
2532 
2533    Input Parameters:
2534 +  comm - MPI communicator
2535 .  m - number of local rows (Cannot be PETSC_DECIDE)
2536 .  n - This value should be the same as the local size used in creating the
2537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2538        calculated if N is given) For square matrices n is almost always m.
2539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2541 .   i - row indices
2542 .   j - column indices
2543 -   a - matrix values
2544 
2545    Output Parameter:
2546 .   mat - the matrix
2547    Level: intermediate
2548 
2549    Notes:
2550        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2551      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2552      called this routine.
2553 
2554        The i and j indices are 0 based
2555 
2556 .keywords: matrix, aij, compressed row, sparse, parallel
2557 
2558 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2559           MPIAIJ, MatCreateMPIAIJ()
2560 @*/
2561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2562 {
2563   PetscErrorCode ierr;
2564 
2565  PetscFunctionBegin;
2566   if (i[0]) {
2567     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2568   }
2569   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2570   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2571   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2572   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 #undef __FUNCT__
2577 #define __FUNCT__ "MatCreateMPIAIJ"
2578 /*@C
2579    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2580    (the default parallel PETSc format).  For good matrix assembly performance
2581    the user should preallocate the matrix storage by setting the parameters
2582    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2583    performance can be increased by more than a factor of 50.
2584 
2585    Collective on MPI_Comm
2586 
2587    Input Parameters:
2588 +  comm - MPI communicator
2589 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2590            This value should be the same as the local size used in creating the
2591            y vector for the matrix-vector product y = Ax.
2592 .  n - This value should be the same as the local size used in creating the
2593        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2594        calculated if N is given) For square matrices n is almost always m.
2595 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2596 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2597 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2598            (same value is used for all local rows)
2599 .  d_nnz - array containing the number of nonzeros in the various rows of the
2600            DIAGONAL portion of the local submatrix (possibly different for each row)
2601            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2602            The size of this array is equal to the number of local rows, i.e 'm'.
2603            You must leave room for the diagonal entry even if it is zero.
2604 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2605            submatrix (same value is used for all local rows).
2606 -  o_nnz - array containing the number of nonzeros in the various rows of the
2607            OFF-DIAGONAL portion of the local submatrix (possibly different for
2608            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2609            structure. The size of this array is equal to the number
2610            of local rows, i.e 'm'.
2611 
2612    Output Parameter:
2613 .  A - the matrix
2614 
2615    Notes:
2616    If the *_nnz parameter is given then the *_nz parameter is ignored
2617 
2618    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2619    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2620    storage requirements for this matrix.
2621 
2622    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2623    processor than it must be used on all processors that share the object for
2624    that argument.
2625 
2626    The user MUST specify either the local or global matrix dimensions
2627    (possibly both).
2628 
2629    The parallel matrix is partitioned such that the first m0 rows belong to
2630    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2631    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2632 
2633    The DIAGONAL portion of the local submatrix of a processor can be defined
2634    as the submatrix which is obtained by extraction the part corresponding
2635    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2636    first row that belongs to the processor, and r2 is the last row belonging
2637    to the this processor. This is a square mxm matrix. The remaining portion
2638    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2639 
2640    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2641 
2642    When calling this routine with a single process communicator, a matrix of
2643    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2644    type of communicator, use the construction mechanism:
2645      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2646 
2647    By default, this format uses inodes (identical nodes) when possible.
2648    We search for consecutive rows with the same nonzero structure, thereby
2649    reusing matrix information to achieve increased efficiency.
2650 
2651    Options Database Keys:
2652 +  -mat_no_inode  - Do not use inodes
2653 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2654 -  -mat_aij_oneindex - Internally use indexing starting at 1
2655         rather than 0.  Note that when calling MatSetValues(),
2656         the user still MUST index entries starting at 0!
2657 
2658 
2659    Example usage:
2660 
2661    Consider the following 8x8 matrix with 34 non-zero values, that is
2662    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2663    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2664    as follows:
2665 
2666 .vb
2667             1  2  0  |  0  3  0  |  0  4
2668     Proc0   0  5  6  |  7  0  0  |  8  0
2669             9  0 10  | 11  0  0  | 12  0
2670     -------------------------------------
2671            13  0 14  | 15 16 17  |  0  0
2672     Proc1   0 18  0  | 19 20 21  |  0  0
2673             0  0  0  | 22 23  0  | 24  0
2674     -------------------------------------
2675     Proc2  25 26 27  |  0  0 28  | 29  0
2676            30  0  0  | 31 32 33  |  0 34
2677 .ve
2678 
2679    This can be represented as a collection of submatrices as:
2680 
2681 .vb
2682       A B C
2683       D E F
2684       G H I
2685 .ve
2686 
2687    Where the submatrices A,B,C are owned by proc0, D,E,F are
2688    owned by proc1, G,H,I are owned by proc2.
2689 
2690    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2691    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2692    The 'M','N' parameters are 8,8, and have the same values on all procs.
2693 
2694    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2695    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2696    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2697    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2698    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2699    matrix, ans [DF] as another SeqAIJ matrix.
2700 
2701    When d_nz, o_nz parameters are specified, d_nz storage elements are
2702    allocated for every row of the local diagonal submatrix, and o_nz
2703    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2704    One way to choose d_nz and o_nz is to use the max nonzerors per local
2705    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2706    In this case, the values of d_nz,o_nz are:
2707 .vb
2708      proc0 : dnz = 2, o_nz = 2
2709      proc1 : dnz = 3, o_nz = 2
2710      proc2 : dnz = 1, o_nz = 4
2711 .ve
2712    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2713    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2714    for proc3. i.e we are using 12+15+10=37 storage locations to store
2715    34 values.
2716 
2717    When d_nnz, o_nnz parameters are specified, the storage is specified
2718    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2719    In the above case the values for d_nnz,o_nnz are:
2720 .vb
2721      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2722      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2723      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2724 .ve
2725    Here the space allocated is sum of all the above values i.e 34, and
2726    hence pre-allocation is perfect.
2727 
2728    Level: intermediate
2729 
2730 .keywords: matrix, aij, compressed row, sparse, parallel
2731 
2732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2733           MPIAIJ, MatCreateMPIAIJWithArrays()
2734 @*/
2735 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2736 {
2737   PetscErrorCode ierr;
2738   PetscMPIInt    size;
2739 
2740   PetscFunctionBegin;
2741   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2742   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2743   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2744   if (size > 1) {
2745     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2746     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2747   } else {
2748     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2749     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2750   }
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2756 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2757 {
2758   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2759 
2760   PetscFunctionBegin;
2761   *Ad     = a->A;
2762   *Ao     = a->B;
2763   *colmap = a->garray;
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2769 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2770 {
2771   PetscErrorCode ierr;
2772   PetscInt       i;
2773   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2774 
2775   PetscFunctionBegin;
2776   if (coloring->ctype == IS_COLORING_LOCAL) {
2777     ISColoringValue *allcolors,*colors;
2778     ISColoring      ocoloring;
2779 
2780     /* set coloring for diagonal portion */
2781     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2782 
2783     /* set coloring for off-diagonal portion */
2784     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2785     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2786     for (i=0; i<a->B->cmap.n; i++) {
2787       colors[i] = allcolors[a->garray[i]];
2788     }
2789     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2790     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2791     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2792     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2793   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2794     ISColoringValue *colors;
2795     PetscInt        *larray;
2796     ISColoring      ocoloring;
2797 
2798     /* set coloring for diagonal portion */
2799     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2800     for (i=0; i<a->A->cmap.n; i++) {
2801       larray[i] = i + A->cmap.rstart;
2802     }
2803     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2804     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2805     for (i=0; i<a->A->cmap.n; i++) {
2806       colors[i] = coloring->colors[larray[i]];
2807     }
2808     ierr = PetscFree(larray);CHKERRQ(ierr);
2809     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2810     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2811     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2812 
2813     /* set coloring for off-diagonal portion */
2814     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2815     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2816     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2817     for (i=0; i<a->B->cmap.n; i++) {
2818       colors[i] = coloring->colors[larray[i]];
2819     }
2820     ierr = PetscFree(larray);CHKERRQ(ierr);
2821     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2822     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2823     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2824   } else {
2825     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2826   }
2827 
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 #if defined(PETSC_HAVE_ADIC)
2832 #undef __FUNCT__
2833 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2834 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2835 {
2836   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2837   PetscErrorCode ierr;
2838 
2839   PetscFunctionBegin;
2840   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2841   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844 #endif
2845 
2846 #undef __FUNCT__
2847 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2848 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2849 {
2850   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2851   PetscErrorCode ierr;
2852 
2853   PetscFunctionBegin;
2854   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2855   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 #undef __FUNCT__
2860 #define __FUNCT__ "MatMerge"
2861 /*@C
2862       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2863                  matrices from each processor
2864 
2865     Collective on MPI_Comm
2866 
2867    Input Parameters:
2868 +    comm - the communicators the parallel matrix will live on
2869 .    inmat - the input sequential matrices
2870 .    n - number of local columns (or PETSC_DECIDE)
2871 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2872 
2873    Output Parameter:
2874 .    outmat - the parallel matrix generated
2875 
2876     Level: advanced
2877 
2878    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2879 
2880 @*/
2881 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2882 {
2883   PetscErrorCode ierr;
2884   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2885   PetscInt       *indx;
2886   PetscScalar    *values;
2887 
2888   PetscFunctionBegin;
2889   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2890   if (scall == MAT_INITIAL_MATRIX){
2891     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2892     if (n == PETSC_DECIDE){
2893       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2894     }
2895     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2896     rstart -= m;
2897 
2898     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2899     for (i=0;i<m;i++) {
2900       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2901       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2902       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2903     }
2904     /* This routine will ONLY return MPIAIJ type matrix */
2905     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2906     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2907     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2908     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2909     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2910 
2911   } else if (scall == MAT_REUSE_MATRIX){
2912     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2913   } else {
2914     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2915   }
2916 
2917   for (i=0;i<m;i++) {
2918     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2919     I    = i + rstart;
2920     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2921     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2922   }
2923   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2924   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2926 
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatFileSplit"
2932 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2933 {
2934   PetscErrorCode    ierr;
2935   PetscMPIInt       rank;
2936   PetscInt          m,N,i,rstart,nnz;
2937   size_t            len;
2938   const PetscInt    *indx;
2939   PetscViewer       out;
2940   char              *name;
2941   Mat               B;
2942   const PetscScalar *values;
2943 
2944   PetscFunctionBegin;
2945   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2946   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2947   /* Should this be the type of the diagonal block of A? */
2948   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2949   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2950   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2951   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2952   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2953   for (i=0;i<m;i++) {
2954     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2955     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2956     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2957   }
2958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960 
2961   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2962   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2963   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2964   sprintf(name,"%s.%d",outfile,rank);
2965   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2966   ierr = PetscFree(name);
2967   ierr = MatView(B,out);CHKERRQ(ierr);
2968   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2969   ierr = MatDestroy(B);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2974 #undef __FUNCT__
2975 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2976 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2977 {
2978   PetscErrorCode       ierr;
2979   Mat_Merge_SeqsToMPI  *merge;
2980   PetscObjectContainer container;
2981 
2982   PetscFunctionBegin;
2983   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2984   if (container) {
2985     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2986     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2987     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2988     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2989     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2990     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2991     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2992     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2993     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2994     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2995     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2996     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2997 
2998     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2999     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
3000   }
3001   ierr = PetscFree(merge);CHKERRQ(ierr);
3002 
3003   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3004   PetscFunctionReturn(0);
3005 }
3006 
3007 #include "src/mat/utils/freespace.h"
3008 #include "petscbt.h"
3009 static PetscEvent logkey_seqstompinum = 0;
3010 #undef __FUNCT__
3011 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3012 /*@C
3013       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3014                  matrices from each processor
3015 
3016     Collective on MPI_Comm
3017 
3018    Input Parameters:
3019 +    comm - the communicators the parallel matrix will live on
3020 .    seqmat - the input sequential matrices
3021 .    m - number of local rows (or PETSC_DECIDE)
3022 .    n - number of local columns (or PETSC_DECIDE)
3023 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3024 
3025    Output Parameter:
3026 .    mpimat - the parallel matrix generated
3027 
3028     Level: advanced
3029 
3030    Notes:
3031      The dimensions of the sequential matrix in each processor MUST be the same.
3032      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3033      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3034 @*/
3035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3036 {
3037   PetscErrorCode       ierr;
3038   MPI_Comm             comm=mpimat->comm;
3039   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3040   PetscMPIInt          size,rank,taga,*len_s;
3041   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3042   PetscInt             proc,m;
3043   PetscInt             **buf_ri,**buf_rj;
3044   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3045   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3046   MPI_Request          *s_waits,*r_waits;
3047   MPI_Status           *status;
3048   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3049   Mat_Merge_SeqsToMPI  *merge;
3050   PetscObjectContainer container;
3051 
3052   PetscFunctionBegin;
3053   if (!logkey_seqstompinum) {
3054     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3055   }
3056   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3057 
3058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3059   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3060 
3061   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3062   if (container) {
3063     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3064   }
3065   bi     = merge->bi;
3066   bj     = merge->bj;
3067   buf_ri = merge->buf_ri;
3068   buf_rj = merge->buf_rj;
3069 
3070   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3071   owners = merge->rowmap.range;
3072   len_s  = merge->len_s;
3073 
3074   /* send and recv matrix values */
3075   /*-----------------------------*/
3076   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3077   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3078 
3079   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3080   for (proc=0,k=0; proc<size; proc++){
3081     if (!len_s[proc]) continue;
3082     i = owners[proc];
3083     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3084     k++;
3085   }
3086 
3087   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3088   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3089   ierr = PetscFree(status);CHKERRQ(ierr);
3090 
3091   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3092   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3093 
3094   /* insert mat values of mpimat */
3095   /*----------------------------*/
3096   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3097   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3098   nextrow = buf_ri_k + merge->nrecv;
3099   nextai  = nextrow + merge->nrecv;
3100 
3101   for (k=0; k<merge->nrecv; k++){
3102     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3103     nrows = *(buf_ri_k[k]);
3104     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3105     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3106   }
3107 
3108   /* set values of ba */
3109   m = merge->rowmap.n;
3110   for (i=0; i<m; i++) {
3111     arow = owners[rank] + i;
3112     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3113     bnzi = bi[i+1] - bi[i];
3114     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3115 
3116     /* add local non-zero vals of this proc's seqmat into ba */
3117     anzi = ai[arow+1] - ai[arow];
3118     aj   = a->j + ai[arow];
3119     aa   = a->a + ai[arow];
3120     nextaj = 0;
3121     for (j=0; nextaj<anzi; j++){
3122       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3123         ba_i[j] += aa[nextaj++];
3124       }
3125     }
3126 
3127     /* add received vals into ba */
3128     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3129       /* i-th row */
3130       if (i == *nextrow[k]) {
3131         anzi = *(nextai[k]+1) - *nextai[k];
3132         aj   = buf_rj[k] + *(nextai[k]);
3133         aa   = abuf_r[k] + *(nextai[k]);
3134         nextaj = 0;
3135         for (j=0; nextaj<anzi; j++){
3136           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3137             ba_i[j] += aa[nextaj++];
3138           }
3139         }
3140         nextrow[k]++; nextai[k]++;
3141       }
3142     }
3143     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3144   }
3145   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3146   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3147 
3148   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3149   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3150   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3151   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 static PetscEvent logkey_seqstompisym = 0;
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3158 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3159 {
3160   PetscErrorCode       ierr;
3161   Mat                  B_mpi;
3162   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3163   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3164   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3165   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3166   PetscInt             len,proc,*dnz,*onz;
3167   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3168   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3169   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3170   MPI_Status           *status;
3171   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3172   PetscBT              lnkbt;
3173   Mat_Merge_SeqsToMPI  *merge;
3174   PetscObjectContainer container;
3175 
3176   PetscFunctionBegin;
3177   if (!logkey_seqstompisym) {
3178     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3179   }
3180   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3181 
3182   /* make sure it is a PETSc comm */
3183   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3186 
3187   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3188   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3189 
3190   /* determine row ownership */
3191   /*---------------------------------------------------------*/
3192   merge->rowmap.n = m;
3193   merge->rowmap.N = M;
3194   merge->rowmap.bs = 1;
3195   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3196   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3197   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3198 
3199   m      = merge->rowmap.n;
3200   M      = merge->rowmap.N;
3201   owners = merge->rowmap.range;
3202 
3203   /* determine the number of messages to send, their lengths */
3204   /*---------------------------------------------------------*/
3205   len_s  = merge->len_s;
3206 
3207   len = 0;  /* length of buf_si[] */
3208   merge->nsend = 0;
3209   for (proc=0; proc<size; proc++){
3210     len_si[proc] = 0;
3211     if (proc == rank){
3212       len_s[proc] = 0;
3213     } else {
3214       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3215       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3216     }
3217     if (len_s[proc]) {
3218       merge->nsend++;
3219       nrows = 0;
3220       for (i=owners[proc]; i<owners[proc+1]; i++){
3221         if (ai[i+1] > ai[i]) nrows++;
3222       }
3223       len_si[proc] = 2*(nrows+1);
3224       len += len_si[proc];
3225     }
3226   }
3227 
3228   /* determine the number and length of messages to receive for ij-structure */
3229   /*-------------------------------------------------------------------------*/
3230   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3231   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3232 
3233   /* post the Irecv of j-structure */
3234   /*-------------------------------*/
3235   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3236   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3237 
3238   /* post the Isend of j-structure */
3239   /*--------------------------------*/
3240   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3241   sj_waits = si_waits + merge->nsend;
3242 
3243   for (proc=0, k=0; proc<size; proc++){
3244     if (!len_s[proc]) continue;
3245     i = owners[proc];
3246     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3247     k++;
3248   }
3249 
3250   /* receives and sends of j-structure are complete */
3251   /*------------------------------------------------*/
3252   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3253   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3254 
3255   /* send and recv i-structure */
3256   /*---------------------------*/
3257   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3258   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3259 
3260   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3261   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3262   for (proc=0,k=0; proc<size; proc++){
3263     if (!len_s[proc]) continue;
3264     /* form outgoing message for i-structure:
3265          buf_si[0]:                 nrows to be sent
3266                [1:nrows]:           row index (global)
3267                [nrows+1:2*nrows+1]: i-structure index
3268     */
3269     /*-------------------------------------------*/
3270     nrows = len_si[proc]/2 - 1;
3271     buf_si_i    = buf_si + nrows+1;
3272     buf_si[0]   = nrows;
3273     buf_si_i[0] = 0;
3274     nrows = 0;
3275     for (i=owners[proc]; i<owners[proc+1]; i++){
3276       anzi = ai[i+1] - ai[i];
3277       if (anzi) {
3278         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3279         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3280         nrows++;
3281       }
3282     }
3283     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3284     k++;
3285     buf_si += len_si[proc];
3286   }
3287 
3288   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3289   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3290 
3291   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3292   for (i=0; i<merge->nrecv; i++){
3293     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3294   }
3295 
3296   ierr = PetscFree(len_si);CHKERRQ(ierr);
3297   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3298   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3299   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3300   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3301   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3302   ierr = PetscFree(status);CHKERRQ(ierr);
3303 
3304   /* compute a local seq matrix in each processor */
3305   /*----------------------------------------------*/
3306   /* allocate bi array and free space for accumulating nonzero column info */
3307   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3308   bi[0] = 0;
3309 
3310   /* create and initialize a linked list */
3311   nlnk = N+1;
3312   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3313 
3314   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3315   len = 0;
3316   len  = ai[owners[rank+1]] - ai[owners[rank]];
3317   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3318   current_space = free_space;
3319 
3320   /* determine symbolic info for each local row */
3321   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3322   nextrow = buf_ri_k + merge->nrecv;
3323   nextai  = nextrow + merge->nrecv;
3324   for (k=0; k<merge->nrecv; k++){
3325     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3326     nrows = *buf_ri_k[k];
3327     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3328     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3329   }
3330 
3331   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3332   len = 0;
3333   for (i=0;i<m;i++) {
3334     bnzi   = 0;
3335     /* add local non-zero cols of this proc's seqmat into lnk */
3336     arow   = owners[rank] + i;
3337     anzi   = ai[arow+1] - ai[arow];
3338     aj     = a->j + ai[arow];
3339     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3340     bnzi += nlnk;
3341     /* add received col data into lnk */
3342     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3343       if (i == *nextrow[k]) { /* i-th row */
3344         anzi = *(nextai[k]+1) - *nextai[k];
3345         aj   = buf_rj[k] + *nextai[k];
3346         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3347         bnzi += nlnk;
3348         nextrow[k]++; nextai[k]++;
3349       }
3350     }
3351     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3352 
3353     /* if free space is not available, make more free space */
3354     if (current_space->local_remaining<bnzi) {
3355       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3356       nspacedouble++;
3357     }
3358     /* copy data into free space, then initialize lnk */
3359     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3360     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3361 
3362     current_space->array           += bnzi;
3363     current_space->local_used      += bnzi;
3364     current_space->local_remaining -= bnzi;
3365 
3366     bi[i+1] = bi[i] + bnzi;
3367   }
3368 
3369   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3370 
3371   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3372   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3373   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3374 
3375   /* create symbolic parallel matrix B_mpi */
3376   /*---------------------------------------*/
3377   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3378   if (n==PETSC_DECIDE) {
3379     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3380   } else {
3381     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3382   }
3383   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3384   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3385   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3386 
3387   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3388   B_mpi->assembled     = PETSC_FALSE;
3389   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3390   merge->bi            = bi;
3391   merge->bj            = bj;
3392   merge->buf_ri        = buf_ri;
3393   merge->buf_rj        = buf_rj;
3394   merge->coi           = PETSC_NULL;
3395   merge->coj           = PETSC_NULL;
3396   merge->owners_co     = PETSC_NULL;
3397 
3398   /* attach the supporting struct to B_mpi for reuse */
3399   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3400   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3401   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3402   *mpimat = B_mpi;
3403 
3404   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3405   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 static PetscEvent logkey_seqstompi = 0;
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatMerge_SeqsToMPI"
3412 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3413 {
3414   PetscErrorCode   ierr;
3415 
3416   PetscFunctionBegin;
3417   if (!logkey_seqstompi) {
3418     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3419   }
3420   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3421   if (scall == MAT_INITIAL_MATRIX){
3422     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3423   }
3424   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3425   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3426   PetscFunctionReturn(0);
3427 }
3428 static PetscEvent logkey_getlocalmat = 0;
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatGetLocalMat"
3431 /*@C
3432      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3433 
3434     Not Collective
3435 
3436    Input Parameters:
3437 +    A - the matrix
3438 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3439 
3440    Output Parameter:
3441 .    A_loc - the local sequential matrix generated
3442 
3443     Level: developer
3444 
3445 @*/
3446 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3447 {
3448   PetscErrorCode  ierr;
3449   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3450   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3451   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3452   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3453   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3454   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3455 
3456   PetscFunctionBegin;
3457   if (!logkey_getlocalmat) {
3458     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3459   }
3460   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3461   if (scall == MAT_INITIAL_MATRIX){
3462     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3463     ci[0] = 0;
3464     for (i=0; i<am; i++){
3465       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3466     }
3467     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3468     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3469     k = 0;
3470     for (i=0; i<am; i++) {
3471       ncols_o = bi[i+1] - bi[i];
3472       ncols_d = ai[i+1] - ai[i];
3473       /* off-diagonal portion of A */
3474       for (jo=0; jo<ncols_o; jo++) {
3475         col = cmap[*bj];
3476         if (col >= cstart) break;
3477         cj[k]   = col; bj++;
3478         ca[k++] = *ba++;
3479       }
3480       /* diagonal portion of A */
3481       for (j=0; j<ncols_d; j++) {
3482         cj[k]   = cstart + *aj++;
3483         ca[k++] = *aa++;
3484       }
3485       /* off-diagonal portion of A */
3486       for (j=jo; j<ncols_o; j++) {
3487         cj[k]   = cmap[*bj++];
3488         ca[k++] = *ba++;
3489       }
3490     }
3491     /* put together the new matrix */
3492     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3493     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3494     /* Since these are PETSc arrays, change flags to free them as necessary. */
3495     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3496     mat->freedata = PETSC_TRUE;
3497     mat->nonew    = 0;
3498   } else if (scall == MAT_REUSE_MATRIX){
3499     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3500     ci = mat->i; cj = mat->j; ca = mat->a;
3501     for (i=0; i<am; i++) {
3502       /* off-diagonal portion of A */
3503       ncols_o = bi[i+1] - bi[i];
3504       for (jo=0; jo<ncols_o; jo++) {
3505         col = cmap[*bj];
3506         if (col >= cstart) break;
3507         *ca++ = *ba++; bj++;
3508       }
3509       /* diagonal portion of A */
3510       ncols_d = ai[i+1] - ai[i];
3511       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3512       /* off-diagonal portion of A */
3513       for (j=jo; j<ncols_o; j++) {
3514         *ca++ = *ba++; bj++;
3515       }
3516     }
3517   } else {
3518     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3519   }
3520 
3521   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 static PetscEvent logkey_getlocalmatcondensed = 0;
3526 #undef __FUNCT__
3527 #define __FUNCT__ "MatGetLocalMatCondensed"
3528 /*@C
3529      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3530 
3531     Not Collective
3532 
3533    Input Parameters:
3534 +    A - the matrix
3535 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3536 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3537 
3538    Output Parameter:
3539 .    A_loc - the local sequential matrix generated
3540 
3541     Level: developer
3542 
3543 @*/
3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3545 {
3546   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3547   PetscErrorCode    ierr;
3548   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3549   IS                isrowa,iscola;
3550   Mat               *aloc;
3551 
3552   PetscFunctionBegin;
3553   if (!logkey_getlocalmatcondensed) {
3554     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3555   }
3556   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3557   if (!row){
3558     start = A->rmap.rstart; end = A->rmap.rend;
3559     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3560   } else {
3561     isrowa = *row;
3562   }
3563   if (!col){
3564     start = A->cmap.rstart;
3565     cmap  = a->garray;
3566     nzA   = a->A->cmap.n;
3567     nzB   = a->B->cmap.n;
3568     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3569     ncols = 0;
3570     for (i=0; i<nzB; i++) {
3571       if (cmap[i] < start) idx[ncols++] = cmap[i];
3572       else break;
3573     }
3574     imark = i;
3575     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3576     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3577     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3578     ierr = PetscFree(idx);CHKERRQ(ierr);
3579   } else {
3580     iscola = *col;
3581   }
3582   if (scall != MAT_INITIAL_MATRIX){
3583     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3584     aloc[0] = *A_loc;
3585   }
3586   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3587   *A_loc = aloc[0];
3588   ierr = PetscFree(aloc);CHKERRQ(ierr);
3589   if (!row){
3590     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3591   }
3592   if (!col){
3593     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3594   }
3595   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3596   PetscFunctionReturn(0);
3597 }
3598 
3599 static PetscEvent logkey_GetBrowsOfAcols = 0;
3600 #undef __FUNCT__
3601 #define __FUNCT__ "MatGetBrowsOfAcols"
3602 /*@C
3603     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3604 
3605     Collective on Mat
3606 
3607    Input Parameters:
3608 +    A,B - the matrices in mpiaij format
3609 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3610 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3611 
3612    Output Parameter:
3613 +    rowb, colb - index sets of rows and columns of B to extract
3614 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3615 -    B_seq - the sequential matrix generated
3616 
3617     Level: developer
3618 
3619 @*/
3620 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3621 {
3622   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3623   PetscErrorCode    ierr;
3624   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3625   IS                isrowb,iscolb;
3626   Mat               *bseq;
3627 
3628   PetscFunctionBegin;
3629   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3630     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3631   }
3632   if (!logkey_GetBrowsOfAcols) {
3633     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3634   }
3635   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3636 
3637   if (scall == MAT_INITIAL_MATRIX){
3638     start = A->cmap.rstart;
3639     cmap  = a->garray;
3640     nzA   = a->A->cmap.n;
3641     nzB   = a->B->cmap.n;
3642     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3643     ncols = 0;
3644     for (i=0; i<nzB; i++) {  /* row < local row index */
3645       if (cmap[i] < start) idx[ncols++] = cmap[i];
3646       else break;
3647     }
3648     imark = i;
3649     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3650     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3651     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3652     ierr = PetscFree(idx);CHKERRQ(ierr);
3653     *brstart = imark;
3654     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3655   } else {
3656     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3657     isrowb = *rowb; iscolb = *colb;
3658     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3659     bseq[0] = *B_seq;
3660   }
3661   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3662   *B_seq = bseq[0];
3663   ierr = PetscFree(bseq);CHKERRQ(ierr);
3664   if (!rowb){
3665     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3666   } else {
3667     *rowb = isrowb;
3668   }
3669   if (!colb){
3670     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3671   } else {
3672     *colb = iscolb;
3673   }
3674   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3675   PetscFunctionReturn(0);
3676 }
3677 
3678 static PetscEvent logkey_GetBrowsOfAocols = 0;
3679 #undef __FUNCT__
3680 #define __FUNCT__ "MatGetBrowsOfAoCols"
3681 /*@C
3682     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3683     of the OFF-DIAGONAL portion of local A
3684 
3685     Collective on Mat
3686 
3687    Input Parameters:
3688 +    A,B - the matrices in mpiaij format
3689 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3690 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3691 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3692 
3693    Output Parameter:
3694 +    B_oth - the sequential matrix generated
3695 
3696     Level: developer
3697 
3698 @*/
3699 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3700 {
3701   VecScatter_MPI_General *gen_to,*gen_from;
3702   PetscErrorCode         ierr;
3703   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3704   Mat_SeqAIJ             *b_oth;
3705   VecScatter             ctx=a->Mvctx;
3706   MPI_Comm               comm=ctx->comm;
3707   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3708   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3709   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3710   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3711   MPI_Request            *rwaits,*swaits;
3712   MPI_Status             *sstatus,rstatus;
3713   PetscInt               *cols;
3714   PetscScalar            *vals;
3715   PetscMPIInt            j;
3716 
3717   PetscFunctionBegin;
3718   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3719     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3720   }
3721   if (!logkey_GetBrowsOfAocols) {
3722     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3723   }
3724   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3725   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3726 
3727   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3728   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3729   rvalues  = gen_from->values; /* holds the length of sending row */
3730   svalues  = gen_to->values;   /* holds the length of receiving row */
3731   nrecvs   = gen_from->n;
3732   nsends   = gen_to->n;
3733 
3734   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3735   srow     = gen_to->indices;   /* local row index to be sent */
3736   rstarts  = gen_from->starts;
3737   sstarts  = gen_to->starts;
3738   rprocs   = gen_from->procs;
3739   sprocs   = gen_to->procs;
3740   sstatus  = gen_to->sstatus;
3741 
3742   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3743   if (scall == MAT_INITIAL_MATRIX){
3744     /* i-array */
3745     /*---------*/
3746     /*  post receives */
3747     for (i=0; i<nrecvs; i++){
3748       rowlen = (PetscInt*)rvalues + rstarts[i];
3749       nrows = rstarts[i+1]-rstarts[i];
3750       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3751     }
3752 
3753     /* pack the outgoing message */
3754     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3755     rstartsj = sstartsj + nsends +1;
3756     sstartsj[0] = 0;  rstartsj[0] = 0;
3757     len = 0; /* total length of j or a array to be sent */
3758     k = 0;
3759     for (i=0; i<nsends; i++){
3760       rowlen = (PetscInt*)svalues + sstarts[i];
3761       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3762       for (j=0; j<nrows; j++) {
3763         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3764         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3765         len += rowlen[j];
3766         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3767         k++;
3768       }
3769       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3770        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3771     }
3772     /* recvs and sends of i-array are completed */
3773     i = nrecvs;
3774     while (i--) {
3775       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3776     }
3777     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3778     /* allocate buffers for sending j and a arrays */
3779     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3780     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3781 
3782     /* create i-array of B_oth */
3783     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3784     b_othi[0] = 0;
3785     len = 0; /* total length of j or a array to be received */
3786     k = 0;
3787     for (i=0; i<nrecvs; i++){
3788       rowlen = (PetscInt*)rvalues + rstarts[i];
3789       nrows = rstarts[i+1]-rstarts[i];
3790       for (j=0; j<nrows; j++) {
3791         b_othi[k+1] = b_othi[k] + rowlen[j];
3792         len += rowlen[j]; k++;
3793       }
3794       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3795     }
3796 
3797     /* allocate space for j and a arrrays of B_oth */
3798     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3799     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3800 
3801     /* j-array */
3802     /*---------*/
3803     /*  post receives of j-array */
3804     for (i=0; i<nrecvs; i++){
3805       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3806       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3807     }
3808     k = 0;
3809     for (i=0; i<nsends; i++){
3810       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3811       bufJ = bufj+sstartsj[i];
3812       for (j=0; j<nrows; j++) {
3813         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3814         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3815         for (l=0; l<ncols; l++){
3816           *bufJ++ = cols[l];
3817         }
3818         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3819       }
3820       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3821     }
3822 
3823     /* recvs and sends of j-array are completed */
3824     i = nrecvs;
3825     while (i--) {
3826       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3827     }
3828     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3829   } else if (scall == MAT_REUSE_MATRIX){
3830     sstartsj = *startsj;
3831     rstartsj = sstartsj + nsends +1;
3832     bufa     = *bufa_ptr;
3833     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3834     b_otha   = b_oth->a;
3835   } else {
3836     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3837   }
3838 
3839   /* a-array */
3840   /*---------*/
3841   /*  post receives of a-array */
3842   for (i=0; i<nrecvs; i++){
3843     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3844     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3845   }
3846   k = 0;
3847   for (i=0; i<nsends; i++){
3848     nrows = sstarts[i+1]-sstarts[i];
3849     bufA = bufa+sstartsj[i];
3850     for (j=0; j<nrows; j++) {
3851       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3852       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3853       for (l=0; l<ncols; l++){
3854         *bufA++ = vals[l];
3855       }
3856       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3857 
3858     }
3859     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3860   }
3861   /* recvs and sends of a-array are completed */
3862   i = nrecvs;
3863   while (i--) {
3864     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3865   }
3866   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3867   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3868 
3869   if (scall == MAT_INITIAL_MATRIX){
3870     /* put together the new matrix */
3871     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3872 
3873     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3874     /* Since these are PETSc arrays, change flags to free them as necessary. */
3875     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3876     b_oth->freedata = PETSC_TRUE;
3877     b_oth->nonew    = 0;
3878 
3879     ierr = PetscFree(bufj);CHKERRQ(ierr);
3880     if (!startsj || !bufa_ptr){
3881       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3882       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3883     } else {
3884       *startsj  = sstartsj;
3885       *bufa_ptr = bufa;
3886     }
3887   }
3888   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3889 
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 #undef __FUNCT__
3894 #define __FUNCT__ "MatGetCommunicationStructs"
3895 /*@C
3896   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3897 
3898   Not Collective
3899 
3900   Input Parameters:
3901 . A - The matrix in mpiaij format
3902 
3903   Output Parameter:
3904 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3905 . colmap - A map from global column index to local index into lvec
3906 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3907 
3908   Level: developer
3909 
3910 @*/
3911 #if defined (PETSC_USE_CTABLE)
3912 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3913 #else
3914 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3915 #endif
3916 {
3917   Mat_MPIAIJ *a;
3918 
3919   PetscFunctionBegin;
3920   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3921   PetscValidPointer(lvec, 2)
3922   PetscValidPointer(colmap, 3)
3923   PetscValidPointer(multScatter, 4)
3924   a = (Mat_MPIAIJ *) A->data;
3925   if (lvec) *lvec = a->lvec;
3926   if (colmap) *colmap = a->colmap;
3927   if (multScatter) *multScatter = a->Mvctx;
3928   PetscFunctionReturn(0);
3929 }
3930 
3931 /*MC
3932    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3933 
3934    Options Database Keys:
3935 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3936 
3937   Level: beginner
3938 
3939 .seealso: MatCreateMPIAIJ
3940 M*/
3941 
3942 EXTERN_C_BEGIN
3943 #undef __FUNCT__
3944 #define __FUNCT__ "MatCreate_MPIAIJ"
3945 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3946 {
3947   Mat_MPIAIJ     *b;
3948   PetscErrorCode ierr;
3949   PetscMPIInt    size;
3950 
3951   PetscFunctionBegin;
3952   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3953 
3954   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3955   B->data         = (void*)b;
3956   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3957   B->factor       = 0;
3958   B->rmap.bs      = 1;
3959   B->assembled    = PETSC_FALSE;
3960   B->mapping      = 0;
3961 
3962   B->insertmode      = NOT_SET_VALUES;
3963   b->size            = size;
3964   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3965 
3966   /* build cache for off array entries formed */
3967   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3968   b->donotstash  = PETSC_FALSE;
3969   b->colmap      = 0;
3970   b->garray      = 0;
3971   b->roworiented = PETSC_TRUE;
3972 
3973   /* stuff used for matrix vector multiply */
3974   b->lvec      = PETSC_NULL;
3975   b->Mvctx     = PETSC_NULL;
3976 
3977   /* stuff for MatGetRow() */
3978   b->rowindices   = 0;
3979   b->rowvalues    = 0;
3980   b->getrowactive = PETSC_FALSE;
3981 
3982 
3983   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3984                                      "MatStoreValues_MPIAIJ",
3985                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3986   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3987                                      "MatRetrieveValues_MPIAIJ",
3988                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3989   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3990 				     "MatGetDiagonalBlock_MPIAIJ",
3991                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3992   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3993 				     "MatIsTranspose_MPIAIJ",
3994 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3996 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3997 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3999 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
4000 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
4001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
4002 				     "MatDiagonalScaleLocal_MPIAIJ",
4003 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMatMult_mpiaij_mpidense_C",
4005                                      "MatMatMult_MPIAIJ_MPIDense",
4006                                       MatMatMult_MPIAIJ_MPIDense);CHKERRQ(ierr);
4007   PetscFunctionReturn(0);
4008 }
4009 EXTERN_C_END
4010 
4011 /*
4012     Special version for direct calls from Fortran
4013 */
4014 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4015 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4016 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4017 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4018 #endif
4019 
4020 /* Change these macros so can be used in void function */
4021 #undef CHKERRQ
4022 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4023 #undef SETERRQ2
4024 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4025 #undef SETERRQ
4026 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4027 
4028 EXTERN_C_BEGIN
4029 #undef __FUNCT__
4030 #define __FUNCT__ "matsetvaluesmpiaij_"
4031 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
4032 {
4033   Mat            mat = *mmat;
4034   PetscInt       m = *mm, n = *mn;
4035   InsertMode     addv = *maddv;
4036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4037   PetscScalar    value;
4038   PetscErrorCode ierr;
4039 
4040   MatPreallocated(mat);
4041   if (mat->insertmode == NOT_SET_VALUES) {
4042     mat->insertmode = addv;
4043   }
4044 #if defined(PETSC_USE_DEBUG)
4045   else if (mat->insertmode != addv) {
4046     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4047   }
4048 #endif
4049   {
4050   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4051   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4052   PetscTruth     roworiented = aij->roworiented;
4053 
4054   /* Some Variables required in the macro */
4055   Mat            A = aij->A;
4056   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4057   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4058   PetscScalar    *aa = a->a;
4059   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4060   Mat            B = aij->B;
4061   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4062   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4063   PetscScalar    *ba = b->a;
4064 
4065   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4066   PetscInt       nonew = a->nonew;
4067   PetscScalar    *ap1,*ap2;
4068 
4069   PetscFunctionBegin;
4070   for (i=0; i<m; i++) {
4071     if (im[i] < 0) continue;
4072 #if defined(PETSC_USE_DEBUG)
4073     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4074 #endif
4075     if (im[i] >= rstart && im[i] < rend) {
4076       row      = im[i] - rstart;
4077       lastcol1 = -1;
4078       rp1      = aj + ai[row];
4079       ap1      = aa + ai[row];
4080       rmax1    = aimax[row];
4081       nrow1    = ailen[row];
4082       low1     = 0;
4083       high1    = nrow1;
4084       lastcol2 = -1;
4085       rp2      = bj + bi[row];
4086       ap2      = ba + bi[row];
4087       rmax2    = bimax[row];
4088       nrow2    = bilen[row];
4089       low2     = 0;
4090       high2    = nrow2;
4091 
4092       for (j=0; j<n; j++) {
4093         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4094         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4095         if (in[j] >= cstart && in[j] < cend){
4096           col = in[j] - cstart;
4097           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4098         } else if (in[j] < 0) continue;
4099 #if defined(PETSC_USE_DEBUG)
4100         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4101 #endif
4102         else {
4103           if (mat->was_assembled) {
4104             if (!aij->colmap) {
4105               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4106             }
4107 #if defined (PETSC_USE_CTABLE)
4108             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4109 	    col--;
4110 #else
4111             col = aij->colmap[in[j]] - 1;
4112 #endif
4113             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4114               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4115               col =  in[j];
4116               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4117               B = aij->B;
4118               b = (Mat_SeqAIJ*)B->data;
4119               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4120               rp2      = bj + bi[row];
4121               ap2      = ba + bi[row];
4122               rmax2    = bimax[row];
4123               nrow2    = bilen[row];
4124               low2     = 0;
4125               high2    = nrow2;
4126               bm       = aij->B->rmap.n;
4127               ba = b->a;
4128             }
4129           } else col = in[j];
4130           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4131         }
4132       }
4133     } else {
4134       if (!aij->donotstash) {
4135         if (roworiented) {
4136           if (ignorezeroentries && v[i*n] == 0.0) continue;
4137           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4138         } else {
4139           if (ignorezeroentries && v[i] == 0.0) continue;
4140           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4141         }
4142       }
4143     }
4144   }}
4145   PetscFunctionReturnVoid();
4146 }
4147 EXTERN_C_END
4148