xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   /* do not use 'b = (Mat_SeqAIJ *)aij->B->data' as B can be reset in disassembly */
324   PetscFunctionBegin;
325   if (!aij->donotstash) {
326     while (1) {
327       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
328       if (!flg) break;
329 
330       for (i=0; i<n;) {
331         /* Now identify the consecutive vals belonging to the same row */
332         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
333         if (j < n) ncols = j-i;
334         else       ncols = n-i;
335         /* Now assemble all these values with a single function call */
336         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
337         i = j;
338       }
339     }
340     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
341   }
342   a->compressedrow.use     = PETSC_FALSE;
343   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
345 
346   /* determine if any processor has disassembled, if so we must
347      also disassemble ourselfs, in order that we may reassemble. */
348   /*
349      if nonzero structure of submatrix B cannot change then we know that
350      no processor disassembled thus we can skip this stuff
351   */
352   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
353     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
354     if (mat->was_assembled && !other_disassembled) {
355       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
356     }
357   }
358   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
359     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
360   }
361   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
362   ((Mat_SeqAIJ *)aij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
363   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
364   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
365 
366   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
367   aij->rowvalues = 0;
368 
369   /* used by MatAXPY() */
370   a->xtoy = 0; ((Mat_SeqAIJ *)aij->B->data)->xtoy = 0;  /* b->xtoy = 0 */
371   a->XtoY = 0; ((Mat_SeqAIJ *)aij->B->data)->XtoY = 0;  /* b->XtoY = 0 */
372 
373   PetscFunctionReturn(0);
374 }
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
378 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
379 {
380   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
381   PetscErrorCode ierr;
382 
383   PetscFunctionBegin;
384   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
385   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatZeroRows_MPIAIJ"
391 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
392 {
393   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
394   PetscErrorCode ierr;
395   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
396   PetscInt       i,*owners = A->rmap.range;
397   PetscInt       *nprocs,j,idx,nsends,row;
398   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
399   PetscInt       *rvalues,count,base,slen,*source;
400   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
401   MPI_Comm       comm = A->comm;
402   MPI_Request    *send_waits,*recv_waits;
403   MPI_Status     recv_status,*send_status;
404 #if defined(PETSC_DEBUG)
405   PetscTruth     found = PETSC_FALSE;
406 #endif
407 
408   PetscFunctionBegin;
409   /*  first count number of contributors to each processor */
410   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
411   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
412   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
413   j = 0;
414   for (i=0; i<N; i++) {
415     if (lastidx > (idx = rows[i])) j = 0;
416     lastidx = idx;
417     for (; j<size; j++) {
418       if (idx >= owners[j] && idx < owners[j+1]) {
419         nprocs[2*j]++;
420         nprocs[2*j+1] = 1;
421         owner[i] = j;
422 #if defined(PETSC_DEBUG)
423         found = PETSC_TRUE;
424 #endif
425         break;
426       }
427     }
428 #if defined(PETSC_DEBUG)
429     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
430     found = PETSC_FALSE;
431 #endif
432   }
433   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
434 
435   /* inform other processors of number of messages and max length*/
436   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
437 
438   /* post receives:   */
439   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
440   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
441   for (i=0; i<nrecvs; i++) {
442     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
443   }
444 
445   /* do sends:
446       1) starts[i] gives the starting index in svalues for stuff going to
447          the ith processor
448   */
449   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
450   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
451   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
452   starts[0] = 0;
453   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
454   for (i=0; i<N; i++) {
455     svalues[starts[owner[i]]++] = rows[i];
456   }
457 
458   starts[0] = 0;
459   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
460   count = 0;
461   for (i=0; i<size; i++) {
462     if (nprocs[2*i+1]) {
463       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
464     }
465   }
466   ierr = PetscFree(starts);CHKERRQ(ierr);
467 
468   base = owners[rank];
469 
470   /*  wait on receives */
471   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
472   source = lens + nrecvs;
473   count  = nrecvs; slen = 0;
474   while (count) {
475     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
476     /* unpack receives into our local space */
477     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
478     source[imdex]  = recv_status.MPI_SOURCE;
479     lens[imdex]    = n;
480     slen          += n;
481     count--;
482   }
483   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
484 
485   /* move the data into the send scatter */
486   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
487   count = 0;
488   for (i=0; i<nrecvs; i++) {
489     values = rvalues + i*nmax;
490     for (j=0; j<lens[i]; j++) {
491       lrows[count++] = values[j] - base;
492     }
493   }
494   ierr = PetscFree(rvalues);CHKERRQ(ierr);
495   ierr = PetscFree(lens);CHKERRQ(ierr);
496   ierr = PetscFree(owner);CHKERRQ(ierr);
497   ierr = PetscFree(nprocs);CHKERRQ(ierr);
498 
499   /* actually zap the local rows */
500   /*
501         Zero the required rows. If the "diagonal block" of the matrix
502      is square and the user wishes to set the diagonal we use separate
503      code so that MatSetValues() is not called for each diagonal allocating
504      new memory, thus calling lots of mallocs and slowing things down.
505 
506        Contributed by: Matthew Knepley
507   */
508   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
509   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
510   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
511     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
512   } else if (diag != 0.0) {
513     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
514     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
515       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
516 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
517     }
518     for (i = 0; i < slen; i++) {
519       row  = lrows[i] + rstart;
520       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
521     }
522     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
523     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
524   } else {
525     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
526   }
527   ierr = PetscFree(lrows);CHKERRQ(ierr);
528 
529   /* wait on sends */
530   if (nsends) {
531     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
532     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
533     ierr = PetscFree(send_status);CHKERRQ(ierr);
534   }
535   ierr = PetscFree(send_waits);CHKERRQ(ierr);
536   ierr = PetscFree(svalues);CHKERRQ(ierr);
537 
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatMult_MPIAIJ"
543 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
544 {
545   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
546   PetscErrorCode ierr;
547   PetscInt       nt;
548 
549   PetscFunctionBegin;
550   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
551   if (nt != A->cmap.n) {
552     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
553   }
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultAdd_MPIAIJ"
563 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567 
568   PetscFunctionBegin;
569   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
570   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
571   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
578 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
579 {
580   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
581   PetscErrorCode ierr;
582   PetscTruth     merged;
583 
584   PetscFunctionBegin;
585   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
586   /* do nondiagonal part */
587   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
588   if (!merged) {
589     /* send it on its way */
590     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
591     /* do local part */
592     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
593     /* receive remote parts: note this assumes the values are not actually */
594     /* added in yy until the next line, */
595     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
596   } else {
597     /* do local part */
598     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
599     /* send it on its way */
600     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
601     /* values actually were received in the Begin() but we need to call this nop */
602     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 EXTERN_C_BEGIN
608 #undef __FUNCT__
609 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
610 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
611 {
612   MPI_Comm       comm;
613   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
614   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
615   IS             Me,Notme;
616   PetscErrorCode ierr;
617   PetscInt       M,N,first,last,*notme,i;
618   PetscMPIInt    size;
619 
620   PetscFunctionBegin;
621 
622   /* Easy test: symmetric diagonal block */
623   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
624   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
625   if (!*f) PetscFunctionReturn(0);
626   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
628   if (size == 1) PetscFunctionReturn(0);
629 
630   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
631   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
632   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
633   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
634   for (i=0; i<first; i++) notme[i] = i;
635   for (i=last; i<M; i++) notme[i-last+first] = i;
636   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
637   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
638   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
639   Aoff = Aoffs[0];
640   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
641   Boff = Boffs[0];
642   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
643   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
644   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
645   ierr = ISDestroy(Me);CHKERRQ(ierr);
646   ierr = ISDestroy(Notme);CHKERRQ(ierr);
647 
648   PetscFunctionReturn(0);
649 }
650 EXTERN_C_END
651 
652 #undef __FUNCT__
653 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
654 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
655 {
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657   PetscErrorCode ierr;
658 
659   PetscFunctionBegin;
660   /* do nondiagonal part */
661   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
662   /* send it on its way */
663   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
664   /* do local part */
665   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
666   /* receive remote parts */
667   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /*
672   This only works correctly for square matrices where the subblock A->A is the
673    diagonal block
674 */
675 #undef __FUNCT__
676 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
677 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
678 {
679   PetscErrorCode ierr;
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681 
682   PetscFunctionBegin;
683   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
684   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
685     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
686   }
687   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatScale_MPIAIJ"
693 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
694 {
695   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
696   PetscErrorCode ierr;
697 
698   PetscFunctionBegin;
699   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
700   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatDestroy_MPIAIJ"
706 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
707 {
708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
709   PetscErrorCode ierr;
710 
711   PetscFunctionBegin;
712 #if defined(PETSC_USE_LOG)
713   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
714 #endif
715   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
716   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
717   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
718 #if defined (PETSC_USE_CTABLE)
719   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
720 #else
721   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
722 #endif
723   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
724   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
725   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
726   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
727   ierr = PetscFree(aij);CHKERRQ(ierr);
728 
729   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
730   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatView_MPIAIJ_Binary"
741 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
742 {
743   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
744   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
745   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
746   PetscErrorCode    ierr;
747   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
748   int               fd;
749   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
750   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
751   PetscScalar       *column_values;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
755   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
756   nz   = A->nz + B->nz;
757   if (!rank) {
758     header[0] = MAT_FILE_COOKIE;
759     header[1] = mat->rmap.N;
760     header[2] = mat->cmap.N;
761     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
762     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
763     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
764     /* get largest number of rows any processor has */
765     rlen = mat->rmap.n;
766     range = mat->rmap.range;
767     for (i=1; i<size; i++) {
768       rlen = PetscMax(rlen,range[i+1] - range[i]);
769     }
770   } else {
771     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     rlen = mat->rmap.n;
773   }
774 
775   /* load up the local row counts */
776   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
777   for (i=0; i<mat->rmap.n; i++) {
778     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
779   }
780 
781   /* store the row lengths to the file */
782   if (!rank) {
783     MPI_Status status;
784     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
785     for (i=1; i<size; i++) {
786       rlen = range[i+1] - range[i];
787       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
788       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     }
790   } else {
791     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
792   }
793   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
794 
795   /* load up the local column indices */
796   nzmax = nz; /* )th processor needs space a largest processor needs */
797   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
798   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
799   cnt  = 0;
800   for (i=0; i<mat->rmap.n; i++) {
801     for (j=B->i[i]; j<B->i[i+1]; j++) {
802       if ( (col = garray[B->j[j]]) > cstart) break;
803       column_indices[cnt++] = col;
804     }
805     for (k=A->i[i]; k<A->i[i+1]; k++) {
806       column_indices[cnt++] = A->j[k] + cstart;
807     }
808     for (; j<B->i[i+1]; j++) {
809       column_indices[cnt++] = garray[B->j[j]];
810     }
811   }
812   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
813 
814   /* store the column indices to the file */
815   if (!rank) {
816     MPI_Status status;
817     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     for (i=1; i<size; i++) {
819       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
821       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     }
824   } else {
825     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
826     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
827   }
828   ierr = PetscFree(column_indices);CHKERRQ(ierr);
829 
830   /* load up the local column values */
831   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
832   cnt  = 0;
833   for (i=0; i<mat->rmap.n; i++) {
834     for (j=B->i[i]; j<B->i[i+1]; j++) {
835       if ( garray[B->j[j]] > cstart) break;
836       column_values[cnt++] = B->a[j];
837     }
838     for (k=A->i[i]; k<A->i[i+1]; k++) {
839       column_values[cnt++] = A->a[k];
840     }
841     for (; j<B->i[i+1]; j++) {
842       column_values[cnt++] = B->a[j];
843     }
844   }
845   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
846 
847   /* store the column values to the file */
848   if (!rank) {
849     MPI_Status status;
850     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
851     for (i=1; i<size; i++) {
852       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
853       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
854       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     }
857   } else {
858     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
859     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
860   }
861   ierr = PetscFree(column_values);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
867 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
868 {
869   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
870   PetscErrorCode    ierr;
871   PetscMPIInt       rank = aij->rank,size = aij->size;
872   PetscTruth        isdraw,iascii,isbinary;
873   PetscViewer       sviewer;
874   PetscViewerFormat format;
875 
876   PetscFunctionBegin;
877   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
878   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
880   if (iascii) {
881     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
882     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
883       MatInfo    info;
884       PetscTruth inodes;
885 
886       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
889       if (!inodes) {
890         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
891 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
892       } else {
893         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
894 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
895       }
896       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
897       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
898       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
901       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
902       PetscFunctionReturn(0);
903     } else if (format == PETSC_VIEWER_ASCII_INFO) {
904       PetscInt   inodecount,inodelimit,*inodes;
905       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
906       if (inodes) {
907         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
908       } else {
909         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
910       }
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ  *Aloc;
937     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
941     if (!rank) {
942       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
943     } else {
944       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
945     }
946     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
947     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
948     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
949     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
950 
951     /* copy over the A part */
952     Aloc = (Mat_SeqAIJ*)aij->A->data;
953     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
954     row = mat->rmap.rstart;
955     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
959     }
960     aj = Aloc->j;
961     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
962 
963     /* copy over the B part */
964     Aloc = (Mat_SeqAIJ*)aij->B->data;
965     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
966     row  = mat->rmap.rstart;
967     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
968     ct   = cols;
969     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
970     for (i=0; i<m; i++) {
971       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
972       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
973     }
974     ierr = PetscFree(ct);CHKERRQ(ierr);
975     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
976     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
977     /*
978        Everyone has to call to draw the matrix since the graphics waits are
979        synchronized across all processors that share the PetscDraw object
980     */
981     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
984       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
985     }
986     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     ierr = MatDestroy(A);CHKERRQ(ierr);
988   }
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatView_MPIAIJ"
994 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
995 {
996   PetscErrorCode ierr;
997   PetscTruth     iascii,isdraw,issocket,isbinary;
998 
999   PetscFunctionBegin;
1000   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1001   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1002   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1003   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1004   if (iascii || isdraw || isbinary || issocket) {
1005     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1006   } else {
1007     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatRelax_MPIAIJ"
1016 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1017 {
1018   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1019   PetscErrorCode ierr;
1020   Vec            bb1;
1021 
1022   PetscFunctionBegin;
1023   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1024 
1025   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1026 
1027   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1028     if (flag & SOR_ZERO_INITIAL_GUESS) {
1029       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1030       its--;
1031     }
1032 
1033     while (its--) {
1034       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1035       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1036 
1037       /* update rhs: bb1 = bb - B*x */
1038       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1039       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1040 
1041       /* local sweep */
1042       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1043       CHKERRQ(ierr);
1044     }
1045   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1046     if (flag & SOR_ZERO_INITIAL_GUESS) {
1047       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1048       its--;
1049     }
1050     while (its--) {
1051       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1052       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1053 
1054       /* update rhs: bb1 = bb - B*x */
1055       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1056       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1057 
1058       /* local sweep */
1059       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1060       CHKERRQ(ierr);
1061     }
1062   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1063     if (flag & SOR_ZERO_INITIAL_GUESS) {
1064       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1065       its--;
1066     }
1067     while (its--) {
1068       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1069       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1070 
1071       /* update rhs: bb1 = bb - B*x */
1072       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1073       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1074 
1075       /* local sweep */
1076       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1077       CHKERRQ(ierr);
1078     }
1079   } else {
1080     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1081   }
1082 
1083   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatPermute_MPIAIJ"
1089 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1090 {
1091   MPI_Comm       comm,pcomm;
1092   PetscInt       first,local_size,nrows,*rows;
1093   int            ntids;
1094   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1095   PetscErrorCode ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1099   /* make a collective version of 'rowp' */
1100   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1101   if (pcomm==comm) {
1102     crowp = rowp;
1103   } else {
1104     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1105     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1106     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1107     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1108   }
1109   /* collect the global row permutation and invert it */
1110   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1111   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1112   if (pcomm!=comm) {
1113     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1114   }
1115   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1116   /* get the local target indices */
1117   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1118   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1119   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1120   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1121   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1123   /* the column permutation is so much easier;
1124      make a local version of 'colp' and invert it */
1125   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1126   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1127   if (ntids==1) {
1128     lcolp = colp;
1129   } else {
1130     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1131     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1132     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1133   }
1134   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1135   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1136   if (ntids>1) {
1137     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1138     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1139   }
1140   /* now we just get the submatrix */
1141   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1142   /* clean up */
1143   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1144   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1150 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1153   Mat            A = mat->A,B = mat->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = 1.0;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   }
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   info->rows_global       = (double)matin->rmap.N;
1190   info->columns_global    = (double)matin->cmap.N;
1191   info->rows_local        = (double)matin->rmap.n;
1192   info->columns_local     = (double)matin->cmap.N;
1193 
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatSetOption_MPIAIJ"
1199 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1200 {
1201   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NO_NEW_NONZERO_LOCATIONS:
1207   case MAT_YES_NEW_NONZERO_LOCATIONS:
1208   case MAT_COLUMNS_UNSORTED:
1209   case MAT_COLUMNS_SORTED:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_KEEP_ZEROED_ROWS:
1212   case MAT_NEW_NONZERO_LOCATION_ERR:
1213   case MAT_USE_INODES:
1214   case MAT_DO_NOT_USE_INODES:
1215   case MAT_IGNORE_ZERO_ENTRIES:
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROW_ORIENTED:
1220     a->roworiented = PETSC_TRUE;
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROWS_SORTED:
1225   case MAT_ROWS_UNSORTED:
1226   case MAT_YES_NEW_DIAGONALS:
1227     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1228     break;
1229   case MAT_COLUMN_ORIENTED:
1230     a->roworiented = PETSC_FALSE;
1231     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1232     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1233     break;
1234   case MAT_IGNORE_OFF_PROC_ENTRIES:
1235     a->donotstash = PETSC_TRUE;
1236     break;
1237   case MAT_NO_NEW_DIAGONALS:
1238     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1239   case MAT_SYMMETRIC:
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241   case MAT_HERMITIAN:
1242   case MAT_SYMMETRY_ETERNAL:
1243     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1244     break;
1245   case MAT_NOT_SYMMETRIC:
1246   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1247   case MAT_NOT_HERMITIAN:
1248   case MAT_NOT_SYMMETRY_ETERNAL:
1249     break;
1250   default:
1251     SETERRQ(PETSC_ERR_SUP,"unknown option");
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatGetRow_MPIAIJ"
1258 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1264   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1265   PetscInt       *cmap,*idx_p;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1276     PetscInt     max = 1,tmp;
1277     for (i=0; i<matin->rmap.n; i++) {
1278       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1279       if (max < tmp) { max = tmp; }
1280     }
1281     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1282     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1283   }
1284 
1285   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1286   lrow = row - rstart;
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap  = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]];
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1339 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!aij->getrowactive) {
1345     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1346   }
1347   aij->getrowactive = PETSC_FALSE;
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatNorm_MPIAIJ"
1353 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1354 {
1355   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1356   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,cstart = mat->cmap.rstart;
1359   PetscReal      sum = 0.0;
1360   PetscScalar    *v;
1361 
1362   PetscFunctionBegin;
1363   if (aij->size == 1) {
1364     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1365   } else {
1366     if (type == NORM_FROBENIUS) {
1367       v = amat->a;
1368       for (i=0; i<amat->nz; i++) {
1369 #if defined(PETSC_USE_COMPLEX)
1370         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1371 #else
1372         sum += (*v)*(*v); v++;
1373 #endif
1374       }
1375       v = bmat->a;
1376       for (i=0; i<bmat->nz; i++) {
1377 #if defined(PETSC_USE_COMPLEX)
1378         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1379 #else
1380         sum += (*v)*(*v); v++;
1381 #endif
1382       }
1383       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1384       *norm = sqrt(*norm);
1385     } else if (type == NORM_1) { /* max column norm */
1386       PetscReal *tmp,*tmp2;
1387       PetscInt    *jj,*garray = aij->garray;
1388       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1389       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1390       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1391       *norm = 0.0;
1392       v = amat->a; jj = amat->j;
1393       for (j=0; j<amat->nz; j++) {
1394         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1395       }
1396       v = bmat->a; jj = bmat->j;
1397       for (j=0; j<bmat->nz; j++) {
1398         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1399       }
1400       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1401       for (j=0; j<mat->cmap.N; j++) {
1402         if (tmp2[j] > *norm) *norm = tmp2[j];
1403       }
1404       ierr = PetscFree(tmp);CHKERRQ(ierr);
1405       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1406     } else if (type == NORM_INFINITY) { /* max row norm */
1407       PetscReal ntemp = 0.0;
1408       for (j=0; j<aij->A->rmap.n; j++) {
1409         v = amat->a + amat->i[j];
1410         sum = 0.0;
1411         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1412           sum += PetscAbsScalar(*v); v++;
1413         }
1414         v = bmat->a + bmat->i[j];
1415         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         if (sum > ntemp) ntemp = sum;
1419       }
1420       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1421     } else {
1422       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1423     }
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatTranspose_MPIAIJ"
1430 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1431 {
1432   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1433   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1434   PetscErrorCode ierr;
1435   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1436   Mat            B;
1437   PetscScalar    *array;
1438 
1439   PetscFunctionBegin;
1440   if (!matout && M != N) {
1441     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1442   }
1443 
1444   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1445   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1446   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1447   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1448 
1449   /* copy over the A part */
1450   Aloc = (Mat_SeqAIJ*)a->A->data;
1451   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1452   row = A->rmap.rstart;
1453   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1454   for (i=0; i<m; i++) {
1455     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1456     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1457   }
1458   aj = Aloc->j;
1459   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1460 
1461   /* copy over the B part */
1462   Aloc = (Mat_SeqAIJ*)a->B->data;
1463   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1464   row  = A->rmap.rstart;
1465   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1466   ct   = cols;
1467   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1468   for (i=0; i<m; i++) {
1469     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1470     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1471   }
1472   ierr = PetscFree(ct);CHKERRQ(ierr);
1473   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1475   if (matout) {
1476     *matout = B;
1477   } else {
1478     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1485 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1486 {
1487   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1488   Mat            a = aij->A,b = aij->B;
1489   PetscErrorCode ierr;
1490   PetscInt       s1,s2,s3;
1491 
1492   PetscFunctionBegin;
1493   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1494   if (rr) {
1495     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1496     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1497     /* Overlap communication with computation. */
1498     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1499   }
1500   if (ll) {
1501     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1502     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1503     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1504   }
1505   /* scale  the diagonal block */
1506   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1507 
1508   if (rr) {
1509     /* Do a scatter end and then right scale the off-diagonal block */
1510     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1511     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1512   }
1513 
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1520 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1521 {
1522   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   if (!a->rank) {
1527     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1534 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1535 {
1536   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1541   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1546 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1547 {
1548   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatEqual_MPIAIJ"
1558 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1559 {
1560   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1561   Mat            a,b,c,d;
1562   PetscTruth     flg;
1563   PetscErrorCode ierr;
1564 
1565   PetscFunctionBegin;
1566   a = matA->A; b = matA->B;
1567   c = matB->A; d = matB->B;
1568 
1569   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1570   if (flg) {
1571     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1572   }
1573   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatCopy_MPIAIJ"
1579 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1580 {
1581   PetscErrorCode ierr;
1582   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1583   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1584 
1585   PetscFunctionBegin;
1586   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1587   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1588     /* because of the column compression in the off-processor part of the matrix a->B,
1589        the number of columns in a->B and b->B may be different, hence we cannot call
1590        the MatCopy() directly on the two parts. If need be, we can provide a more
1591        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1592        then copying the submatrices */
1593     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1594   } else {
1595     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1596     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1597   }
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1603 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1604 {
1605   PetscErrorCode ierr;
1606 
1607   PetscFunctionBegin;
1608   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1609   PetscFunctionReturn(0);
1610 }
1611 
1612 #include "petscblaslapack.h"
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatAXPY_MPIAIJ"
1615 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1616 {
1617   PetscErrorCode ierr;
1618   PetscInt       i;
1619   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1620   PetscBLASInt   bnz,one=1;
1621   Mat_SeqAIJ     *x,*y;
1622 
1623   PetscFunctionBegin;
1624   if (str == SAME_NONZERO_PATTERN) {
1625     PetscScalar alpha = a;
1626     x = (Mat_SeqAIJ *)xx->A->data;
1627     y = (Mat_SeqAIJ *)yy->A->data;
1628     bnz = (PetscBLASInt)x->nz;
1629     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1630     x = (Mat_SeqAIJ *)xx->B->data;
1631     y = (Mat_SeqAIJ *)yy->B->data;
1632     bnz = (PetscBLASInt)x->nz;
1633     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1634   } else if (str == SUBSET_NONZERO_PATTERN) {
1635     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1636 
1637     x = (Mat_SeqAIJ *)xx->B->data;
1638     y = (Mat_SeqAIJ *)yy->B->data;
1639     if (y->xtoy && y->XtoY != xx->B) {
1640       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1641       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1642     }
1643     if (!y->xtoy) { /* get xtoy */
1644       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1645       y->XtoY = xx->B;
1646     }
1647     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1648   } else {
1649     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1650   }
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatConjugate_MPIAIJ"
1658 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1659 {
1660 #if defined(PETSC_USE_COMPLEX)
1661   PetscErrorCode ierr;
1662   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1663 
1664   PetscFunctionBegin;
1665   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1666   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1667 #else
1668   PetscFunctionBegin;
1669 #endif
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatRealPart_MPIAIJ"
1675 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1676 {
1677   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1678   PetscErrorCode ierr;
1679 
1680   PetscFunctionBegin;
1681   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1682   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1688 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1689 {
1690   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1695   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 /* -------------------------------------------------------------------*/
1700 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1701        MatGetRow_MPIAIJ,
1702        MatRestoreRow_MPIAIJ,
1703        MatMult_MPIAIJ,
1704 /* 4*/ MatMultAdd_MPIAIJ,
1705        MatMultTranspose_MPIAIJ,
1706        MatMultTransposeAdd_MPIAIJ,
1707        0,
1708        0,
1709        0,
1710 /*10*/ 0,
1711        0,
1712        0,
1713        MatRelax_MPIAIJ,
1714        MatTranspose_MPIAIJ,
1715 /*15*/ MatGetInfo_MPIAIJ,
1716        MatEqual_MPIAIJ,
1717        MatGetDiagonal_MPIAIJ,
1718        MatDiagonalScale_MPIAIJ,
1719        MatNorm_MPIAIJ,
1720 /*20*/ MatAssemblyBegin_MPIAIJ,
1721        MatAssemblyEnd_MPIAIJ,
1722        0,
1723        MatSetOption_MPIAIJ,
1724        MatZeroEntries_MPIAIJ,
1725 /*25*/ MatZeroRows_MPIAIJ,
1726        0,
1727        0,
1728        0,
1729        0,
1730 /*30*/ MatSetUpPreallocation_MPIAIJ,
1731        0,
1732        0,
1733        0,
1734        0,
1735 /*35*/ MatDuplicate_MPIAIJ,
1736        0,
1737        0,
1738        0,
1739        0,
1740 /*40*/ MatAXPY_MPIAIJ,
1741        MatGetSubMatrices_MPIAIJ,
1742        MatIncreaseOverlap_MPIAIJ,
1743        MatGetValues_MPIAIJ,
1744        MatCopy_MPIAIJ,
1745 /*45*/ MatPrintHelp_MPIAIJ,
1746        MatScale_MPIAIJ,
1747        0,
1748        0,
1749        0,
1750 /*50*/ MatSetBlockSize_MPIAIJ,
1751        0,
1752        0,
1753        0,
1754        0,
1755 /*55*/ MatFDColoringCreate_MPIAIJ,
1756        0,
1757        MatSetUnfactored_MPIAIJ,
1758        MatPermute_MPIAIJ,
1759        0,
1760 /*60*/ MatGetSubMatrix_MPIAIJ,
1761        MatDestroy_MPIAIJ,
1762        MatView_MPIAIJ,
1763        0,
1764        0,
1765 /*65*/ 0,
1766        0,
1767        0,
1768        0,
1769        0,
1770 /*70*/ 0,
1771        0,
1772        MatSetColoring_MPIAIJ,
1773 #if defined(PETSC_HAVE_ADIC)
1774        MatSetValuesAdic_MPIAIJ,
1775 #else
1776        0,
1777 #endif
1778        MatSetValuesAdifor_MPIAIJ,
1779 /*75*/ 0,
1780        0,
1781        0,
1782        0,
1783        0,
1784 /*80*/ 0,
1785        0,
1786        0,
1787        0,
1788 /*84*/ MatLoad_MPIAIJ,
1789        0,
1790        0,
1791        0,
1792        0,
1793        0,
1794 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1795        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1796        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1797        MatPtAP_Basic,
1798        MatPtAPSymbolic_MPIAIJ,
1799 /*95*/ MatPtAPNumeric_MPIAIJ,
1800        0,
1801        0,
1802        0,
1803        0,
1804 /*100*/0,
1805        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1806        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1807        MatConjugate_MPIAIJ,
1808        0,
1809 /*105*/MatSetValuesRow_MPIAIJ,
1810        MatRealPart_MPIAIJ,
1811        MatImaginaryPart_MPIAIJ};
1812 
1813 /* ----------------------------------------------------------------------------------------*/
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1819 {
1820   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1825   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 EXTERN_C_END
1829 
1830 EXTERN_C_BEGIN
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1833 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1834 {
1835   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1836   PetscErrorCode ierr;
1837 
1838   PetscFunctionBegin;
1839   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1840   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 EXTERN_C_END
1844 
1845 #include "petscpc.h"
1846 EXTERN_C_BEGIN
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1849 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1850 {
1851   Mat_MPIAIJ     *b;
1852   PetscErrorCode ierr;
1853   PetscInt       i;
1854 
1855   PetscFunctionBegin;
1856   B->preallocated = PETSC_TRUE;
1857   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1858   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1859   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1860   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1861 
1862   B->rmap.bs = B->cmap.bs = 1;
1863   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1864   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1865   if (d_nnz) {
1866     for (i=0; i<B->rmap.n; i++) {
1867       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1868     }
1869   }
1870   if (o_nnz) {
1871     for (i=0; i<B->rmap.n; i++) {
1872       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1873     }
1874   }
1875   b = (Mat_MPIAIJ*)B->data;
1876 
1877   /* Explicitly create 2 MATSEQAIJ matrices. */
1878   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1879   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1880   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1881   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1882   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1883   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1884   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1885   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1886 
1887   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1888   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1889 
1890   PetscFunctionReturn(0);
1891 }
1892 EXTERN_C_END
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1896 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1897 {
1898   Mat            mat;
1899   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1900   PetscErrorCode ierr;
1901 
1902   PetscFunctionBegin;
1903   *newmat       = 0;
1904   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1905   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1906   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1907   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1908   a    = (Mat_MPIAIJ*)mat->data;
1909 
1910   mat->factor       = matin->factor;
1911   mat->rmap.bs      = matin->rmap.bs;
1912   mat->assembled    = PETSC_TRUE;
1913   mat->insertmode   = NOT_SET_VALUES;
1914   mat->preallocated = PETSC_TRUE;
1915 
1916   a->size           = oldmat->size;
1917   a->rank           = oldmat->rank;
1918   a->donotstash     = oldmat->donotstash;
1919   a->roworiented    = oldmat->roworiented;
1920   a->rowindices     = 0;
1921   a->rowvalues      = 0;
1922   a->getrowactive   = PETSC_FALSE;
1923 
1924   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1925   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1926 
1927   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1928   if (oldmat->colmap) {
1929 #if defined (PETSC_USE_CTABLE)
1930     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1931 #else
1932     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1933     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1934     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1935 #endif
1936   } else a->colmap = 0;
1937   if (oldmat->garray) {
1938     PetscInt len;
1939     len  = oldmat->B->cmap.n;
1940     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1941     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1942     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1943   } else a->garray = 0;
1944 
1945   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1946   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1947   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1949   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1953   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1954   *newmat = mat;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #include "petscsys.h"
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatLoad_MPIAIJ"
1962 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1963 {
1964   Mat            A;
1965   PetscScalar    *vals,*svals;
1966   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1967   MPI_Status     status;
1968   PetscErrorCode ierr;
1969   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1970   PetscInt       i,nz,j,rstart,rend,mmax;
1971   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1972   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1973   PetscInt       cend,cstart,n,*rowners;
1974   int            fd;
1975 
1976   PetscFunctionBegin;
1977   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1978   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1979   if (!rank) {
1980     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1981     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1982     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1983   }
1984 
1985   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1986   M = header[1]; N = header[2];
1987   /* determine ownership of all rows */
1988   m    = M/size + ((M % size) > rank);
1989   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1990   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1991 
1992   /* First process needs enough room for process with most rows */
1993   if (!rank) {
1994     mmax       = rowners[1];
1995     for (i=2; i<size; i++) {
1996       mmax = PetscMax(mmax,rowners[i]);
1997     }
1998   } else mmax = m;
1999 
2000   rowners[0] = 0;
2001   for (i=2; i<=size; i++) {
2002     mmax       = PetscMax(mmax,rowners[i]);
2003     rowners[i] += rowners[i-1];
2004   }
2005   rstart = rowners[rank];
2006   rend   = rowners[rank+1];
2007 
2008   /* distribute row lengths to all processors */
2009   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2010   if (!rank) {
2011     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2012     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2013     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2014     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2015     for (j=0; j<m; j++) {
2016       procsnz[0] += ourlens[j];
2017     }
2018     for (i=1; i<size; i++) {
2019       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2020       /* calculate the number of nonzeros on each processor */
2021       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2022         procsnz[i] += rowlengths[j];
2023       }
2024       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2025     }
2026     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2027   } else {
2028     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2029   }
2030 
2031   if (!rank) {
2032     /* determine max buffer needed and allocate it */
2033     maxnz = 0;
2034     for (i=0; i<size; i++) {
2035       maxnz = PetscMax(maxnz,procsnz[i]);
2036     }
2037     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2038 
2039     /* read in my part of the matrix column indices  */
2040     nz   = procsnz[0];
2041     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2042     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2043 
2044     /* read in every one elses and ship off */
2045     for (i=1; i<size; i++) {
2046       nz   = procsnz[i];
2047       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2048       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2049     }
2050     ierr = PetscFree(cols);CHKERRQ(ierr);
2051   } else {
2052     /* determine buffer space needed for message */
2053     nz = 0;
2054     for (i=0; i<m; i++) {
2055       nz += ourlens[i];
2056     }
2057     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2058 
2059     /* receive message of column indices*/
2060     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2061     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2062     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2063   }
2064 
2065   /* determine column ownership if matrix is not square */
2066   if (N != M) {
2067     n      = N/size + ((N % size) > rank);
2068     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2069     cstart = cend - n;
2070   } else {
2071     cstart = rstart;
2072     cend   = rend;
2073     n      = cend - cstart;
2074   }
2075 
2076   /* loop over local rows, determining number of off diagonal entries */
2077   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2078   jj = 0;
2079   for (i=0; i<m; i++) {
2080     for (j=0; j<ourlens[i]; j++) {
2081       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2082       jj++;
2083     }
2084   }
2085 
2086   /* create our matrix */
2087   for (i=0; i<m; i++) {
2088     ourlens[i] -= offlens[i];
2089   }
2090   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2091   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2092   ierr = MatSetType(A,type);CHKERRQ(ierr);
2093   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2094 
2095   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2096   for (i=0; i<m; i++) {
2097     ourlens[i] += offlens[i];
2098   }
2099 
2100   if (!rank) {
2101     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2102 
2103     /* read in my part of the matrix numerical values  */
2104     nz   = procsnz[0];
2105     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2106 
2107     /* insert into matrix */
2108     jj      = rstart;
2109     smycols = mycols;
2110     svals   = vals;
2111     for (i=0; i<m; i++) {
2112       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2113       smycols += ourlens[i];
2114       svals   += ourlens[i];
2115       jj++;
2116     }
2117 
2118     /* read in other processors and ship out */
2119     for (i=1; i<size; i++) {
2120       nz   = procsnz[i];
2121       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2122       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2123     }
2124     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2125   } else {
2126     /* receive numeric values */
2127     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2128 
2129     /* receive message of values*/
2130     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2131     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2132     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2133 
2134     /* insert into matrix */
2135     jj      = rstart;
2136     smycols = mycols;
2137     svals   = vals;
2138     for (i=0; i<m; i++) {
2139       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2140       smycols += ourlens[i];
2141       svals   += ourlens[i];
2142       jj++;
2143     }
2144   }
2145   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2146   ierr = PetscFree(vals);CHKERRQ(ierr);
2147   ierr = PetscFree(mycols);CHKERRQ(ierr);
2148   ierr = PetscFree(rowners);CHKERRQ(ierr);
2149 
2150   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   *newmat = A;
2153   PetscFunctionReturn(0);
2154 }
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2158 /*
2159     Not great since it makes two copies of the submatrix, first an SeqAIJ
2160   in local and then by concatenating the local matrices the end result.
2161   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2162 */
2163 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2164 {
2165   PetscErrorCode ierr;
2166   PetscMPIInt    rank,size;
2167   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2168   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2169   Mat            *local,M,Mreuse;
2170   PetscScalar    *vwork,*aa;
2171   MPI_Comm       comm = mat->comm;
2172   Mat_SeqAIJ     *aij;
2173 
2174 
2175   PetscFunctionBegin;
2176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2177   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2178 
2179   if (call ==  MAT_REUSE_MATRIX) {
2180     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2181     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2182     local = &Mreuse;
2183     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2184   } else {
2185     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2186     Mreuse = *local;
2187     ierr   = PetscFree(local);CHKERRQ(ierr);
2188   }
2189 
2190   /*
2191       m - number of local rows
2192       n - number of columns (same on all processors)
2193       rstart - first row in new global matrix generated
2194   */
2195   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2196   if (call == MAT_INITIAL_MATRIX) {
2197     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2198     ii  = aij->i;
2199     jj  = aij->j;
2200 
2201     /*
2202         Determine the number of non-zeros in the diagonal and off-diagonal
2203         portions of the matrix in order to do correct preallocation
2204     */
2205 
2206     /* first get start and end of "diagonal" columns */
2207     if (csize == PETSC_DECIDE) {
2208       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2209       if (mglobal == n) { /* square matrix */
2210 	nlocal = m;
2211       } else {
2212         nlocal = n/size + ((n % size) > rank);
2213       }
2214     } else {
2215       nlocal = csize;
2216     }
2217     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2218     rstart = rend - nlocal;
2219     if (rank == size - 1 && rend != n) {
2220       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2221     }
2222 
2223     /* next, compute all the lengths */
2224     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2225     olens = dlens + m;
2226     for (i=0; i<m; i++) {
2227       jend = ii[i+1] - ii[i];
2228       olen = 0;
2229       dlen = 0;
2230       for (j=0; j<jend; j++) {
2231         if (*jj < rstart || *jj >= rend) olen++;
2232         else dlen++;
2233         jj++;
2234       }
2235       olens[i] = olen;
2236       dlens[i] = dlen;
2237     }
2238     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2239     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2240     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2241     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2242     ierr = PetscFree(dlens);CHKERRQ(ierr);
2243   } else {
2244     PetscInt ml,nl;
2245 
2246     M = *newmat;
2247     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2248     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2249     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2250     /*
2251          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2252        rather than the slower MatSetValues().
2253     */
2254     M->was_assembled = PETSC_TRUE;
2255     M->assembled     = PETSC_FALSE;
2256   }
2257   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2258   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2259   ii  = aij->i;
2260   jj  = aij->j;
2261   aa  = aij->a;
2262   for (i=0; i<m; i++) {
2263     row   = rstart + i;
2264     nz    = ii[i+1] - ii[i];
2265     cwork = jj;     jj += nz;
2266     vwork = aa;     aa += nz;
2267     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2268   }
2269 
2270   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2272   *newmat = M;
2273 
2274   /* save submatrix used in processor for next request */
2275   if (call ==  MAT_INITIAL_MATRIX) {
2276     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2277     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2278   }
2279 
2280   PetscFunctionReturn(0);
2281 }
2282 
2283 EXTERN_C_BEGIN
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2286 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2287 {
2288   PetscInt       m,cstart, cend,j,nnz,i,d;
2289   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2290   const PetscInt *JJ;
2291   PetscScalar    *values;
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2296 
2297   B->rmap.bs = B->cmap.bs = 1;
2298   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2299   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2300   m      = B->rmap.n;
2301   cstart = B->cmap.rstart;
2302   cend   = B->cmap.rend;
2303   rstart = B->rmap.rstart;
2304 
2305   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2306   o_nnz = d_nnz + m;
2307 
2308   for (i=0; i<m; i++) {
2309     nnz     = I[i+1]- I[i];
2310     JJ      = J + I[i];
2311     nnz_max = PetscMax(nnz_max,nnz);
2312     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2313     for (j=0; j<nnz; j++) {
2314       if (*JJ >= cstart) break;
2315       JJ++;
2316     }
2317     d = 0;
2318     for (; j<nnz; j++) {
2319       if (*JJ++ >= cend) break;
2320       d++;
2321     }
2322     d_nnz[i] = d;
2323     o_nnz[i] = nnz - d;
2324   }
2325   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2326   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2327 
2328   if (v) values = (PetscScalar*)v;
2329   else {
2330     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2331     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2332   }
2333 
2334   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2335   for (i=0; i<m; i++) {
2336     ii   = i + rstart;
2337     nnz  = I[i+1]- I[i];
2338     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2339   }
2340   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2342   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2343 
2344   if (!v) {
2345     ierr = PetscFree(values);CHKERRQ(ierr);
2346   }
2347   PetscFunctionReturn(0);
2348 }
2349 EXTERN_C_END
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2353 /*@
2354    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2355    (the default parallel PETSc format).
2356 
2357    Collective on MPI_Comm
2358 
2359    Input Parameters:
2360 +  B - the matrix
2361 .  i - the indices into j for the start of each local row (starts with zero)
2362 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2363 -  v - optional values in the matrix
2364 
2365    Level: developer
2366 
2367    Notes: this actually copies the values from i[], j[], and a[] to put them into PETSc's internal
2368      storage format. Thus changing the values in a[] after this call will not effect the matrix values.
2369 
2370 .keywords: matrix, aij, compressed row, sparse, parallel
2371 
2372 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ,
2373           MatCreateSeqAIJWithArrays()
2374 @*/
2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2376 {
2377   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2381   if (f) {
2382     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2389 /*@C
2390    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2391    (the default parallel PETSc format).  For good matrix assembly performance
2392    the user should preallocate the matrix storage by setting the parameters
2393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2394    performance can be increased by more than a factor of 50.
2395 
2396    Collective on MPI_Comm
2397 
2398    Input Parameters:
2399 +  A - the matrix
2400 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2401            (same value is used for all local rows)
2402 .  d_nnz - array containing the number of nonzeros in the various rows of the
2403            DIAGONAL portion of the local submatrix (possibly different for each row)
2404            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2405            The size of this array is equal to the number of local rows, i.e 'm'.
2406            You must leave room for the diagonal entry even if it is zero.
2407 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2408            submatrix (same value is used for all local rows).
2409 -  o_nnz - array containing the number of nonzeros in the various rows of the
2410            OFF-DIAGONAL portion of the local submatrix (possibly different for
2411            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2412            structure. The size of this array is equal to the number
2413            of local rows, i.e 'm'.
2414 
2415    If the *_nnz parameter is given then the *_nz parameter is ignored
2416 
2417    The AIJ format (also called the Yale sparse matrix format or
2418    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2419    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2420 
2421    The parallel matrix is partitioned such that the first m0 rows belong to
2422    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2423    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2424 
2425    The DIAGONAL portion of the local submatrix of a processor can be defined
2426    as the submatrix which is obtained by extraction the part corresponding
2427    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2428    first row that belongs to the processor, and r2 is the last row belonging
2429    to the this processor. This is a square mxm matrix. The remaining portion
2430    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2431 
2432    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2433 
2434    Example usage:
2435 
2436    Consider the following 8x8 matrix with 34 non-zero values, that is
2437    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2438    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2439    as follows:
2440 
2441 .vb
2442             1  2  0  |  0  3  0  |  0  4
2443     Proc0   0  5  6  |  7  0  0  |  8  0
2444             9  0 10  | 11  0  0  | 12  0
2445     -------------------------------------
2446            13  0 14  | 15 16 17  |  0  0
2447     Proc1   0 18  0  | 19 20 21  |  0  0
2448             0  0  0  | 22 23  0  | 24  0
2449     -------------------------------------
2450     Proc2  25 26 27  |  0  0 28  | 29  0
2451            30  0  0  | 31 32 33  |  0 34
2452 .ve
2453 
2454    This can be represented as a collection of submatrices as:
2455 
2456 .vb
2457       A B C
2458       D E F
2459       G H I
2460 .ve
2461 
2462    Where the submatrices A,B,C are owned by proc0, D,E,F are
2463    owned by proc1, G,H,I are owned by proc2.
2464 
2465    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2466    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2467    The 'M','N' parameters are 8,8, and have the same values on all procs.
2468 
2469    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2470    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2471    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2472    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2473    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2474    matrix, ans [DF] as another SeqAIJ matrix.
2475 
2476    When d_nz, o_nz parameters are specified, d_nz storage elements are
2477    allocated for every row of the local diagonal submatrix, and o_nz
2478    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2479    One way to choose d_nz and o_nz is to use the max nonzerors per local
2480    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2481    In this case, the values of d_nz,o_nz are:
2482 .vb
2483      proc0 : dnz = 2, o_nz = 2
2484      proc1 : dnz = 3, o_nz = 2
2485      proc2 : dnz = 1, o_nz = 4
2486 .ve
2487    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2488    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2489    for proc3. i.e we are using 12+15+10=37 storage locations to store
2490    34 values.
2491 
2492    When d_nnz, o_nnz parameters are specified, the storage is specified
2493    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2494    In the above case the values for d_nnz,o_nnz are:
2495 .vb
2496      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2497      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2498      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2499 .ve
2500    Here the space allocated is sum of all the above values i.e 34, and
2501    hence pre-allocation is perfect.
2502 
2503    Level: intermediate
2504 
2505 .keywords: matrix, aij, compressed row, sparse, parallel
2506 
2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2508           MPIAIJ
2509 @*/
2510 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2511 {
2512   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2513 
2514   PetscFunctionBegin;
2515   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2516   if (f) {
2517     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
2524 /*@C
2525      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
2526          CSR format the local rows.
2527 
2528    Collective on MPI_Comm
2529 
2530    Input Parameters:
2531 +  comm - MPI communicator
2532 .  m - number of local rows (Cannot be PETSC_DECIDE)
2533 .  n - This value should be the same as the local size used in creating the
2534        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2535        calculated if N is given) For square matrices n is almost always m.
2536 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2537 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2538 .   i - row indices
2539 .   j - column indices
2540 -   a - matrix values
2541 
2542    Output Parameter:
2543 .   mat - the matrix
2544    Level: intermediate
2545 
2546    Notes:
2547        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2548      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2549      called this routine.
2550 
2551        The i and j indices are 0 based
2552 
2553 .keywords: matrix, aij, compressed row, sparse, parallel
2554 
2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2556           MPIAIJ, MatCreateMPIAIJ()
2557 @*/
2558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2559 {
2560   PetscErrorCode ierr;
2561 
2562  PetscFunctionBegin;
2563   if (i[0]) {
2564     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2565   }
2566   if (m < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2567   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2568   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
2569   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatCreateMPIAIJ"
2575 /*@C
2576    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2577    (the default parallel PETSc format).  For good matrix assembly performance
2578    the user should preallocate the matrix storage by setting the parameters
2579    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2580    performance can be increased by more than a factor of 50.
2581 
2582    Collective on MPI_Comm
2583 
2584    Input Parameters:
2585 +  comm - MPI communicator
2586 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2587            This value should be the same as the local size used in creating the
2588            y vector for the matrix-vector product y = Ax.
2589 .  n - This value should be the same as the local size used in creating the
2590        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2591        calculated if N is given) For square matrices n is almost always m.
2592 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2593 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2594 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2595            (same value is used for all local rows)
2596 .  d_nnz - array containing the number of nonzeros in the various rows of the
2597            DIAGONAL portion of the local submatrix (possibly different for each row)
2598            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2599            The size of this array is equal to the number of local rows, i.e 'm'.
2600            You must leave room for the diagonal entry even if it is zero.
2601 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2602            submatrix (same value is used for all local rows).
2603 -  o_nnz - array containing the number of nonzeros in the various rows of the
2604            OFF-DIAGONAL portion of the local submatrix (possibly different for
2605            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2606            structure. The size of this array is equal to the number
2607            of local rows, i.e 'm'.
2608 
2609    Output Parameter:
2610 .  A - the matrix
2611 
2612    Notes:
2613    If the *_nnz parameter is given then the *_nz parameter is ignored
2614 
2615    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2616    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2617    storage requirements for this matrix.
2618 
2619    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2620    processor than it must be used on all processors that share the object for
2621    that argument.
2622 
2623    The user MUST specify either the local or global matrix dimensions
2624    (possibly both).
2625 
2626    The parallel matrix is partitioned such that the first m0 rows belong to
2627    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2628    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2629 
2630    The DIAGONAL portion of the local submatrix of a processor can be defined
2631    as the submatrix which is obtained by extraction the part corresponding
2632    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2633    first row that belongs to the processor, and r2 is the last row belonging
2634    to the this processor. This is a square mxm matrix. The remaining portion
2635    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2636 
2637    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2638 
2639    When calling this routine with a single process communicator, a matrix of
2640    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2641    type of communicator, use the construction mechanism:
2642      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2643 
2644    By default, this format uses inodes (identical nodes) when possible.
2645    We search for consecutive rows with the same nonzero structure, thereby
2646    reusing matrix information to achieve increased efficiency.
2647 
2648    Options Database Keys:
2649 +  -mat_no_inode  - Do not use inodes
2650 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2651 -  -mat_aij_oneindex - Internally use indexing starting at 1
2652         rather than 0.  Note that when calling MatSetValues(),
2653         the user still MUST index entries starting at 0!
2654 
2655 
2656    Example usage:
2657 
2658    Consider the following 8x8 matrix with 34 non-zero values, that is
2659    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2660    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2661    as follows:
2662 
2663 .vb
2664             1  2  0  |  0  3  0  |  0  4
2665     Proc0   0  5  6  |  7  0  0  |  8  0
2666             9  0 10  | 11  0  0  | 12  0
2667     -------------------------------------
2668            13  0 14  | 15 16 17  |  0  0
2669     Proc1   0 18  0  | 19 20 21  |  0  0
2670             0  0  0  | 22 23  0  | 24  0
2671     -------------------------------------
2672     Proc2  25 26 27  |  0  0 28  | 29  0
2673            30  0  0  | 31 32 33  |  0 34
2674 .ve
2675 
2676    This can be represented as a collection of submatrices as:
2677 
2678 .vb
2679       A B C
2680       D E F
2681       G H I
2682 .ve
2683 
2684    Where the submatrices A,B,C are owned by proc0, D,E,F are
2685    owned by proc1, G,H,I are owned by proc2.
2686 
2687    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2688    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2689    The 'M','N' parameters are 8,8, and have the same values on all procs.
2690 
2691    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2692    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2693    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2694    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2695    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2696    matrix, ans [DF] as another SeqAIJ matrix.
2697 
2698    When d_nz, o_nz parameters are specified, d_nz storage elements are
2699    allocated for every row of the local diagonal submatrix, and o_nz
2700    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2701    One way to choose d_nz and o_nz is to use the max nonzerors per local
2702    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2703    In this case, the values of d_nz,o_nz are:
2704 .vb
2705      proc0 : dnz = 2, o_nz = 2
2706      proc1 : dnz = 3, o_nz = 2
2707      proc2 : dnz = 1, o_nz = 4
2708 .ve
2709    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2710    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2711    for proc3. i.e we are using 12+15+10=37 storage locations to store
2712    34 values.
2713 
2714    When d_nnz, o_nnz parameters are specified, the storage is specified
2715    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2716    In the above case the values for d_nnz,o_nnz are:
2717 .vb
2718      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2719      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2720      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2721 .ve
2722    Here the space allocated is sum of all the above values i.e 34, and
2723    hence pre-allocation is perfect.
2724 
2725    Level: intermediate
2726 
2727 .keywords: matrix, aij, compressed row, sparse, parallel
2728 
2729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2730           MPIAIJ, MatCreateMPIAIJWithArrays()
2731 @*/
2732 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2733 {
2734   PetscErrorCode ierr;
2735   PetscMPIInt    size;
2736 
2737   PetscFunctionBegin;
2738   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2739   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2740   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2741   if (size > 1) {
2742     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2743     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2744   } else {
2745     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2746     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2747   }
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2754 {
2755   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2756 
2757   PetscFunctionBegin;
2758   *Ad     = a->A;
2759   *Ao     = a->B;
2760   *colmap = a->garray;
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2766 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2767 {
2768   PetscErrorCode ierr;
2769   PetscInt       i;
2770   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2771 
2772   PetscFunctionBegin;
2773   if (coloring->ctype == IS_COLORING_LOCAL) {
2774     ISColoringValue *allcolors,*colors;
2775     ISColoring      ocoloring;
2776 
2777     /* set coloring for diagonal portion */
2778     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2779 
2780     /* set coloring for off-diagonal portion */
2781     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2782     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2783     for (i=0; i<a->B->cmap.n; i++) {
2784       colors[i] = allcolors[a->garray[i]];
2785     }
2786     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2787     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2788     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2789     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2790   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2791     ISColoringValue *colors;
2792     PetscInt        *larray;
2793     ISColoring      ocoloring;
2794 
2795     /* set coloring for diagonal portion */
2796     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2797     for (i=0; i<a->A->cmap.n; i++) {
2798       larray[i] = i + A->cmap.rstart;
2799     }
2800     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2801     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2802     for (i=0; i<a->A->cmap.n; i++) {
2803       colors[i] = coloring->colors[larray[i]];
2804     }
2805     ierr = PetscFree(larray);CHKERRQ(ierr);
2806     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2807     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2808     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2809 
2810     /* set coloring for off-diagonal portion */
2811     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2812     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2813     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2814     for (i=0; i<a->B->cmap.n; i++) {
2815       colors[i] = coloring->colors[larray[i]];
2816     }
2817     ierr = PetscFree(larray);CHKERRQ(ierr);
2818     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2819     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2820     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2821   } else {
2822     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2823   }
2824 
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #if defined(PETSC_HAVE_ADIC)
2829 #undef __FUNCT__
2830 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2831 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2832 {
2833   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2834   PetscErrorCode ierr;
2835 
2836   PetscFunctionBegin;
2837   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2838   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 #endif
2842 
2843 #undef __FUNCT__
2844 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2845 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2846 {
2847   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2848   PetscErrorCode ierr;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2852   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2853   PetscFunctionReturn(0);
2854 }
2855 
2856 #undef __FUNCT__
2857 #define __FUNCT__ "MatMerge"
2858 /*@C
2859       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2860                  matrices from each processor
2861 
2862     Collective on MPI_Comm
2863 
2864    Input Parameters:
2865 +    comm - the communicators the parallel matrix will live on
2866 .    inmat - the input sequential matrices
2867 .    n - number of local columns (or PETSC_DECIDE)
2868 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2869 
2870    Output Parameter:
2871 .    outmat - the parallel matrix generated
2872 
2873     Level: advanced
2874 
2875    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2876 
2877 @*/
2878 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2879 {
2880   PetscErrorCode ierr;
2881   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2882   PetscInt       *indx;
2883   PetscScalar    *values;
2884 
2885   PetscFunctionBegin;
2886   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2887   if (scall == MAT_INITIAL_MATRIX){
2888     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2889     if (n == PETSC_DECIDE){
2890       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2891     }
2892     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2893     rstart -= m;
2894 
2895     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2896     for (i=0;i<m;i++) {
2897       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2898       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2899       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2900     }
2901     /* This routine will ONLY return MPIAIJ type matrix */
2902     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2903     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2904     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2905     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2906     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2907 
2908   } else if (scall == MAT_REUSE_MATRIX){
2909     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2910   } else {
2911     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2912   }
2913 
2914   for (i=0;i<m;i++) {
2915     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2916     I    = i + rstart;
2917     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2918     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2919   }
2920   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2921   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2922   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2923 
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatFileSplit"
2929 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2930 {
2931   PetscErrorCode    ierr;
2932   PetscMPIInt       rank;
2933   PetscInt          m,N,i,rstart,nnz;
2934   size_t            len;
2935   const PetscInt    *indx;
2936   PetscViewer       out;
2937   char              *name;
2938   Mat               B;
2939   const PetscScalar *values;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2943   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2944   /* Should this be the type of the diagonal block of A? */
2945   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2946   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2947   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2948   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2949   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2950   for (i=0;i<m;i++) {
2951     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2952     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2953     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2954   }
2955   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2957 
2958   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2959   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2960   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2961   sprintf(name,"%s.%d",outfile,rank);
2962   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2963   ierr = PetscFree(name);
2964   ierr = MatView(B,out);CHKERRQ(ierr);
2965   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2966   ierr = MatDestroy(B);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2971 #undef __FUNCT__
2972 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2973 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2974 {
2975   PetscErrorCode       ierr;
2976   Mat_Merge_SeqsToMPI  *merge;
2977   PetscObjectContainer container;
2978 
2979   PetscFunctionBegin;
2980   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2981   if (container) {
2982     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2983     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2984     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2985     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2986     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2987     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2988     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2989     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2990     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2991     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2992     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2993     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2994 
2995     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2996     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2997   }
2998   ierr = PetscFree(merge);CHKERRQ(ierr);
2999 
3000   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 #include "src/mat/utils/freespace.h"
3005 #include "petscbt.h"
3006 static PetscEvent logkey_seqstompinum = 0;
3007 #undef __FUNCT__
3008 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
3009 /*@C
3010       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
3011                  matrices from each processor
3012 
3013     Collective on MPI_Comm
3014 
3015    Input Parameters:
3016 +    comm - the communicators the parallel matrix will live on
3017 .    seqmat - the input sequential matrices
3018 .    m - number of local rows (or PETSC_DECIDE)
3019 .    n - number of local columns (or PETSC_DECIDE)
3020 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3021 
3022    Output Parameter:
3023 .    mpimat - the parallel matrix generated
3024 
3025     Level: advanced
3026 
3027    Notes:
3028      The dimensions of the sequential matrix in each processor MUST be the same.
3029      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
3030      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
3031 @*/
3032 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
3033 {
3034   PetscErrorCode       ierr;
3035   MPI_Comm             comm=mpimat->comm;
3036   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3037   PetscMPIInt          size,rank,taga,*len_s;
3038   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
3039   PetscInt             proc,m;
3040   PetscInt             **buf_ri,**buf_rj;
3041   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
3042   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
3043   MPI_Request          *s_waits,*r_waits;
3044   MPI_Status           *status;
3045   MatScalar            *aa=a->a,**abuf_r,*ba_i;
3046   Mat_Merge_SeqsToMPI  *merge;
3047   PetscObjectContainer container;
3048 
3049   PetscFunctionBegin;
3050   if (!logkey_seqstompinum) {
3051     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3052   }
3053   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3054 
3055   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3057 
3058   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3059   if (container) {
3060     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3061   }
3062   bi     = merge->bi;
3063   bj     = merge->bj;
3064   buf_ri = merge->buf_ri;
3065   buf_rj = merge->buf_rj;
3066 
3067   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3068   owners = merge->rowmap.range;
3069   len_s  = merge->len_s;
3070 
3071   /* send and recv matrix values */
3072   /*-----------------------------*/
3073   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3074   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3075 
3076   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3077   for (proc=0,k=0; proc<size; proc++){
3078     if (!len_s[proc]) continue;
3079     i = owners[proc];
3080     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3081     k++;
3082   }
3083 
3084   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3085   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3086   ierr = PetscFree(status);CHKERRQ(ierr);
3087 
3088   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3089   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3090 
3091   /* insert mat values of mpimat */
3092   /*----------------------------*/
3093   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3094   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3095   nextrow = buf_ri_k + merge->nrecv;
3096   nextai  = nextrow + merge->nrecv;
3097 
3098   for (k=0; k<merge->nrecv; k++){
3099     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3100     nrows = *(buf_ri_k[k]);
3101     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3102     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3103   }
3104 
3105   /* set values of ba */
3106   m = merge->rowmap.n;
3107   for (i=0; i<m; i++) {
3108     arow = owners[rank] + i;
3109     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3110     bnzi = bi[i+1] - bi[i];
3111     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3112 
3113     /* add local non-zero vals of this proc's seqmat into ba */
3114     anzi = ai[arow+1] - ai[arow];
3115     aj   = a->j + ai[arow];
3116     aa   = a->a + ai[arow];
3117     nextaj = 0;
3118     for (j=0; nextaj<anzi; j++){
3119       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3120         ba_i[j] += aa[nextaj++];
3121       }
3122     }
3123 
3124     /* add received vals into ba */
3125     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3126       /* i-th row */
3127       if (i == *nextrow[k]) {
3128         anzi = *(nextai[k]+1) - *nextai[k];
3129         aj   = buf_rj[k] + *(nextai[k]);
3130         aa   = abuf_r[k] + *(nextai[k]);
3131         nextaj = 0;
3132         for (j=0; nextaj<anzi; j++){
3133           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3134             ba_i[j] += aa[nextaj++];
3135           }
3136         }
3137         nextrow[k]++; nextai[k]++;
3138       }
3139     }
3140     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3141   }
3142   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3144 
3145   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3146   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3147   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3148   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3149   PetscFunctionReturn(0);
3150 }
3151 
3152 static PetscEvent logkey_seqstompisym = 0;
3153 #undef __FUNCT__
3154 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3155 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3156 {
3157   PetscErrorCode       ierr;
3158   Mat                  B_mpi;
3159   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3160   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3161   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3162   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3163   PetscInt             len,proc,*dnz,*onz;
3164   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3165   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3166   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3167   MPI_Status           *status;
3168   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3169   PetscBT              lnkbt;
3170   Mat_Merge_SeqsToMPI  *merge;
3171   PetscObjectContainer container;
3172 
3173   PetscFunctionBegin;
3174   if (!logkey_seqstompisym) {
3175     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3176   }
3177   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3178 
3179   /* make sure it is a PETSc comm */
3180   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3183 
3184   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3185   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3186 
3187   /* determine row ownership */
3188   /*---------------------------------------------------------*/
3189   merge->rowmap.n = m;
3190   merge->rowmap.N = M;
3191   merge->rowmap.bs = 1;
3192   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3193   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3194   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3195 
3196   m      = merge->rowmap.n;
3197   M      = merge->rowmap.N;
3198   owners = merge->rowmap.range;
3199 
3200   /* determine the number of messages to send, their lengths */
3201   /*---------------------------------------------------------*/
3202   len_s  = merge->len_s;
3203 
3204   len = 0;  /* length of buf_si[] */
3205   merge->nsend = 0;
3206   for (proc=0; proc<size; proc++){
3207     len_si[proc] = 0;
3208     if (proc == rank){
3209       len_s[proc] = 0;
3210     } else {
3211       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3212       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3213     }
3214     if (len_s[proc]) {
3215       merge->nsend++;
3216       nrows = 0;
3217       for (i=owners[proc]; i<owners[proc+1]; i++){
3218         if (ai[i+1] > ai[i]) nrows++;
3219       }
3220       len_si[proc] = 2*(nrows+1);
3221       len += len_si[proc];
3222     }
3223   }
3224 
3225   /* determine the number and length of messages to receive for ij-structure */
3226   /*-------------------------------------------------------------------------*/
3227   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3228   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3229 
3230   /* post the Irecv of j-structure */
3231   /*-------------------------------*/
3232   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3233   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3234 
3235   /* post the Isend of j-structure */
3236   /*--------------------------------*/
3237   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3238   sj_waits = si_waits + merge->nsend;
3239 
3240   for (proc=0, k=0; proc<size; proc++){
3241     if (!len_s[proc]) continue;
3242     i = owners[proc];
3243     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3244     k++;
3245   }
3246 
3247   /* receives and sends of j-structure are complete */
3248   /*------------------------------------------------*/
3249   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3250   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3251 
3252   /* send and recv i-structure */
3253   /*---------------------------*/
3254   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3255   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3256 
3257   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3258   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3259   for (proc=0,k=0; proc<size; proc++){
3260     if (!len_s[proc]) continue;
3261     /* form outgoing message for i-structure:
3262          buf_si[0]:                 nrows to be sent
3263                [1:nrows]:           row index (global)
3264                [nrows+1:2*nrows+1]: i-structure index
3265     */
3266     /*-------------------------------------------*/
3267     nrows = len_si[proc]/2 - 1;
3268     buf_si_i    = buf_si + nrows+1;
3269     buf_si[0]   = nrows;
3270     buf_si_i[0] = 0;
3271     nrows = 0;
3272     for (i=owners[proc]; i<owners[proc+1]; i++){
3273       anzi = ai[i+1] - ai[i];
3274       if (anzi) {
3275         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3276         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3277         nrows++;
3278       }
3279     }
3280     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3281     k++;
3282     buf_si += len_si[proc];
3283   }
3284 
3285   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3286   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3287 
3288   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3289   for (i=0; i<merge->nrecv; i++){
3290     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3291   }
3292 
3293   ierr = PetscFree(len_si);CHKERRQ(ierr);
3294   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3295   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3296   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3297   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3298   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3299   ierr = PetscFree(status);CHKERRQ(ierr);
3300 
3301   /* compute a local seq matrix in each processor */
3302   /*----------------------------------------------*/
3303   /* allocate bi array and free space for accumulating nonzero column info */
3304   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3305   bi[0] = 0;
3306 
3307   /* create and initialize a linked list */
3308   nlnk = N+1;
3309   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3310 
3311   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3312   len = 0;
3313   len  = ai[owners[rank+1]] - ai[owners[rank]];
3314   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3315   current_space = free_space;
3316 
3317   /* determine symbolic info for each local row */
3318   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3319   nextrow = buf_ri_k + merge->nrecv;
3320   nextai  = nextrow + merge->nrecv;
3321   for (k=0; k<merge->nrecv; k++){
3322     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3323     nrows = *buf_ri_k[k];
3324     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3325     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3326   }
3327 
3328   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3329   len = 0;
3330   for (i=0;i<m;i++) {
3331     bnzi   = 0;
3332     /* add local non-zero cols of this proc's seqmat into lnk */
3333     arow   = owners[rank] + i;
3334     anzi   = ai[arow+1] - ai[arow];
3335     aj     = a->j + ai[arow];
3336     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3337     bnzi += nlnk;
3338     /* add received col data into lnk */
3339     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3340       if (i == *nextrow[k]) { /* i-th row */
3341         anzi = *(nextai[k]+1) - *nextai[k];
3342         aj   = buf_rj[k] + *nextai[k];
3343         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3344         bnzi += nlnk;
3345         nextrow[k]++; nextai[k]++;
3346       }
3347     }
3348     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3349 
3350     /* if free space is not available, make more free space */
3351     if (current_space->local_remaining<bnzi) {
3352       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3353       nspacedouble++;
3354     }
3355     /* copy data into free space, then initialize lnk */
3356     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3357     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3358 
3359     current_space->array           += bnzi;
3360     current_space->local_used      += bnzi;
3361     current_space->local_remaining -= bnzi;
3362 
3363     bi[i+1] = bi[i] + bnzi;
3364   }
3365 
3366   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3367 
3368   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3369   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3370   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3371 
3372   /* create symbolic parallel matrix B_mpi */
3373   /*---------------------------------------*/
3374   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3375   if (n==PETSC_DECIDE) {
3376     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3377   } else {
3378     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3379   }
3380   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3381   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3382   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3383 
3384   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3385   B_mpi->assembled     = PETSC_FALSE;
3386   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3387   merge->bi            = bi;
3388   merge->bj            = bj;
3389   merge->buf_ri        = buf_ri;
3390   merge->buf_rj        = buf_rj;
3391   merge->coi           = PETSC_NULL;
3392   merge->coj           = PETSC_NULL;
3393   merge->owners_co     = PETSC_NULL;
3394 
3395   /* attach the supporting struct to B_mpi for reuse */
3396   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3397   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3398   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3399   *mpimat = B_mpi;
3400 
3401   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3402   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 static PetscEvent logkey_seqstompi = 0;
3407 #undef __FUNCT__
3408 #define __FUNCT__ "MatMerge_SeqsToMPI"
3409 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3410 {
3411   PetscErrorCode   ierr;
3412 
3413   PetscFunctionBegin;
3414   if (!logkey_seqstompi) {
3415     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3416   }
3417   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3418   if (scall == MAT_INITIAL_MATRIX){
3419     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3420   }
3421   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3422   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 static PetscEvent logkey_getlocalmat = 0;
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatGetLocalMat"
3428 /*@C
3429      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3430 
3431     Not Collective
3432 
3433    Input Parameters:
3434 +    A - the matrix
3435 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3436 
3437    Output Parameter:
3438 .    A_loc - the local sequential matrix generated
3439 
3440     Level: developer
3441 
3442 @*/
3443 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3444 {
3445   PetscErrorCode  ierr;
3446   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3447   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3448   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3449   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3450   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3451   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3452 
3453   PetscFunctionBegin;
3454   if (!logkey_getlocalmat) {
3455     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3456   }
3457   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3458   if (scall == MAT_INITIAL_MATRIX){
3459     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3460     ci[0] = 0;
3461     for (i=0; i<am; i++){
3462       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3463     }
3464     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3465     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3466     k = 0;
3467     for (i=0; i<am; i++) {
3468       ncols_o = bi[i+1] - bi[i];
3469       ncols_d = ai[i+1] - ai[i];
3470       /* off-diagonal portion of A */
3471       for (jo=0; jo<ncols_o; jo++) {
3472         col = cmap[*bj];
3473         if (col >= cstart) break;
3474         cj[k]   = col; bj++;
3475         ca[k++] = *ba++;
3476       }
3477       /* diagonal portion of A */
3478       for (j=0; j<ncols_d; j++) {
3479         cj[k]   = cstart + *aj++;
3480         ca[k++] = *aa++;
3481       }
3482       /* off-diagonal portion of A */
3483       for (j=jo; j<ncols_o; j++) {
3484         cj[k]   = cmap[*bj++];
3485         ca[k++] = *ba++;
3486       }
3487     }
3488     /* put together the new matrix */
3489     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3490     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3491     /* Since these are PETSc arrays, change flags to free them as necessary. */
3492     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3493     mat->freedata = PETSC_TRUE;
3494     mat->nonew    = 0;
3495   } else if (scall == MAT_REUSE_MATRIX){
3496     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3497     ci = mat->i; cj = mat->j; ca = mat->a;
3498     for (i=0; i<am; i++) {
3499       /* off-diagonal portion of A */
3500       ncols_o = bi[i+1] - bi[i];
3501       for (jo=0; jo<ncols_o; jo++) {
3502         col = cmap[*bj];
3503         if (col >= cstart) break;
3504         *ca++ = *ba++; bj++;
3505       }
3506       /* diagonal portion of A */
3507       ncols_d = ai[i+1] - ai[i];
3508       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3509       /* off-diagonal portion of A */
3510       for (j=jo; j<ncols_o; j++) {
3511         *ca++ = *ba++; bj++;
3512       }
3513     }
3514   } else {
3515     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3516   }
3517 
3518   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 static PetscEvent logkey_getlocalmatcondensed = 0;
3523 #undef __FUNCT__
3524 #define __FUNCT__ "MatGetLocalMatCondensed"
3525 /*@C
3526      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3527 
3528     Not Collective
3529 
3530    Input Parameters:
3531 +    A - the matrix
3532 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3533 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3534 
3535    Output Parameter:
3536 .    A_loc - the local sequential matrix generated
3537 
3538     Level: developer
3539 
3540 @*/
3541 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3542 {
3543   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3544   PetscErrorCode    ierr;
3545   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3546   IS                isrowa,iscola;
3547   Mat               *aloc;
3548 
3549   PetscFunctionBegin;
3550   if (!logkey_getlocalmatcondensed) {
3551     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3552   }
3553   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3554   if (!row){
3555     start = A->rmap.rstart; end = A->rmap.rend;
3556     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3557   } else {
3558     isrowa = *row;
3559   }
3560   if (!col){
3561     start = A->cmap.rstart;
3562     cmap  = a->garray;
3563     nzA   = a->A->cmap.n;
3564     nzB   = a->B->cmap.n;
3565     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3566     ncols = 0;
3567     for (i=0; i<nzB; i++) {
3568       if (cmap[i] < start) idx[ncols++] = cmap[i];
3569       else break;
3570     }
3571     imark = i;
3572     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3573     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3574     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3575     ierr = PetscFree(idx);CHKERRQ(ierr);
3576   } else {
3577     iscola = *col;
3578   }
3579   if (scall != MAT_INITIAL_MATRIX){
3580     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3581     aloc[0] = *A_loc;
3582   }
3583   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3584   *A_loc = aloc[0];
3585   ierr = PetscFree(aloc);CHKERRQ(ierr);
3586   if (!row){
3587     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3588   }
3589   if (!col){
3590     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3591   }
3592   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3593   PetscFunctionReturn(0);
3594 }
3595 
3596 static PetscEvent logkey_GetBrowsOfAcols = 0;
3597 #undef __FUNCT__
3598 #define __FUNCT__ "MatGetBrowsOfAcols"
3599 /*@C
3600     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3601 
3602     Collective on Mat
3603 
3604    Input Parameters:
3605 +    A,B - the matrices in mpiaij format
3606 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3607 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3608 
3609    Output Parameter:
3610 +    rowb, colb - index sets of rows and columns of B to extract
3611 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3612 -    B_seq - the sequential matrix generated
3613 
3614     Level: developer
3615 
3616 @*/
3617 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3618 {
3619   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3620   PetscErrorCode    ierr;
3621   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3622   IS                isrowb,iscolb;
3623   Mat               *bseq;
3624 
3625   PetscFunctionBegin;
3626   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3627     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3628   }
3629   if (!logkey_GetBrowsOfAcols) {
3630     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3631   }
3632   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3633 
3634   if (scall == MAT_INITIAL_MATRIX){
3635     start = A->cmap.rstart;
3636     cmap  = a->garray;
3637     nzA   = a->A->cmap.n;
3638     nzB   = a->B->cmap.n;
3639     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3640     ncols = 0;
3641     for (i=0; i<nzB; i++) {  /* row < local row index */
3642       if (cmap[i] < start) idx[ncols++] = cmap[i];
3643       else break;
3644     }
3645     imark = i;
3646     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3647     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3648     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3649     ierr = PetscFree(idx);CHKERRQ(ierr);
3650     *brstart = imark;
3651     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3652   } else {
3653     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3654     isrowb = *rowb; iscolb = *colb;
3655     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3656     bseq[0] = *B_seq;
3657   }
3658   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3659   *B_seq = bseq[0];
3660   ierr = PetscFree(bseq);CHKERRQ(ierr);
3661   if (!rowb){
3662     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3663   } else {
3664     *rowb = isrowb;
3665   }
3666   if (!colb){
3667     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3668   } else {
3669     *colb = iscolb;
3670   }
3671   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3672   PetscFunctionReturn(0);
3673 }
3674 
3675 static PetscEvent logkey_GetBrowsOfAocols = 0;
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatGetBrowsOfAoCols"
3678 /*@C
3679     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3680     of the OFF-DIAGONAL portion of local A
3681 
3682     Collective on Mat
3683 
3684    Input Parameters:
3685 +    A,B - the matrices in mpiaij format
3686 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3687 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3688 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3689 
3690    Output Parameter:
3691 +    B_oth - the sequential matrix generated
3692 
3693     Level: developer
3694 
3695 @*/
3696 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3697 {
3698   VecScatter_MPI_General *gen_to,*gen_from;
3699   PetscErrorCode         ierr;
3700   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3701   Mat_SeqAIJ             *b_oth;
3702   VecScatter             ctx=a->Mvctx;
3703   MPI_Comm               comm=ctx->comm;
3704   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3705   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3706   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3707   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3708   MPI_Request            *rwaits,*swaits;
3709   MPI_Status             *sstatus,rstatus;
3710   PetscInt               *cols;
3711   PetscScalar            *vals;
3712   PetscMPIInt            j;
3713 
3714   PetscFunctionBegin;
3715   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3716     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3717   }
3718   if (!logkey_GetBrowsOfAocols) {
3719     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3720   }
3721   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3722   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3723 
3724   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3725   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3726   rvalues  = gen_from->values; /* holds the length of sending row */
3727   svalues  = gen_to->values;   /* holds the length of receiving row */
3728   nrecvs   = gen_from->n;
3729   nsends   = gen_to->n;
3730 
3731   ierr = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
3732   srow     = gen_to->indices;   /* local row index to be sent */
3733   rstarts  = gen_from->starts;
3734   sstarts  = gen_to->starts;
3735   rprocs   = gen_from->procs;
3736   sprocs   = gen_to->procs;
3737   sstatus  = gen_to->sstatus;
3738 
3739   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3740   if (scall == MAT_INITIAL_MATRIX){
3741     /* i-array */
3742     /*---------*/
3743     /*  post receives */
3744     for (i=0; i<nrecvs; i++){
3745       rowlen = (PetscInt*)rvalues + rstarts[i];
3746       nrows = rstarts[i+1]-rstarts[i];
3747       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3748     }
3749 
3750     /* pack the outgoing message */
3751     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3752     rstartsj = sstartsj + nsends +1;
3753     sstartsj[0] = 0;  rstartsj[0] = 0;
3754     len = 0; /* total length of j or a array to be sent */
3755     k = 0;
3756     for (i=0; i<nsends; i++){
3757       rowlen = (PetscInt*)svalues + sstarts[i];
3758       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3759       for (j=0; j<nrows; j++) {
3760         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3761         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3762         len += rowlen[j];
3763         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3764         k++;
3765       }
3766       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3767        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3768     }
3769     /* recvs and sends of i-array are completed */
3770     i = nrecvs;
3771     while (i--) {
3772       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3773     }
3774     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3775     /* allocate buffers for sending j and a arrays */
3776     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3777     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3778 
3779     /* create i-array of B_oth */
3780     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3781     b_othi[0] = 0;
3782     len = 0; /* total length of j or a array to be received */
3783     k = 0;
3784     for (i=0; i<nrecvs; i++){
3785       rowlen = (PetscInt*)rvalues + rstarts[i];
3786       nrows = rstarts[i+1]-rstarts[i];
3787       for (j=0; j<nrows; j++) {
3788         b_othi[k+1] = b_othi[k] + rowlen[j];
3789         len += rowlen[j]; k++;
3790       }
3791       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3792     }
3793 
3794     /* allocate space for j and a arrrays of B_oth */
3795     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3796     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3797 
3798     /* j-array */
3799     /*---------*/
3800     /*  post receives of j-array */
3801     for (i=0; i<nrecvs; i++){
3802       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3803       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3804     }
3805     k = 0;
3806     for (i=0; i<nsends; i++){
3807       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3808       bufJ = bufj+sstartsj[i];
3809       for (j=0; j<nrows; j++) {
3810         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3811         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3812         for (l=0; l<ncols; l++){
3813           *bufJ++ = cols[l];
3814         }
3815         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3816       }
3817       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3818     }
3819 
3820     /* recvs and sends of j-array are completed */
3821     i = nrecvs;
3822     while (i--) {
3823       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3824     }
3825     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3826   } else if (scall == MAT_REUSE_MATRIX){
3827     sstartsj = *startsj;
3828     rstartsj = sstartsj + nsends +1;
3829     bufa     = *bufa_ptr;
3830     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3831     b_otha   = b_oth->a;
3832   } else {
3833     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3834   }
3835 
3836   /* a-array */
3837   /*---------*/
3838   /*  post receives of a-array */
3839   for (i=0; i<nrecvs; i++){
3840     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3841     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3842   }
3843   k = 0;
3844   for (i=0; i<nsends; i++){
3845     nrows = sstarts[i+1]-sstarts[i];
3846     bufA = bufa+sstartsj[i];
3847     for (j=0; j<nrows; j++) {
3848       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3849       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3850       for (l=0; l<ncols; l++){
3851         *bufA++ = vals[l];
3852       }
3853       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3854 
3855     }
3856     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3857   }
3858   /* recvs and sends of a-array are completed */
3859   i = nrecvs;
3860   while (i--) {
3861     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3862   }
3863   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3864   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
3865 
3866   if (scall == MAT_INITIAL_MATRIX){
3867     /* put together the new matrix */
3868     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3869 
3870     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3871     /* Since these are PETSc arrays, change flags to free them as necessary. */
3872     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3873     b_oth->freedata = PETSC_TRUE;
3874     b_oth->nonew    = 0;
3875 
3876     ierr = PetscFree(bufj);CHKERRQ(ierr);
3877     if (!startsj || !bufa_ptr){
3878       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3879       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3880     } else {
3881       *startsj  = sstartsj;
3882       *bufa_ptr = bufa;
3883     }
3884   }
3885   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3886 
3887   PetscFunctionReturn(0);
3888 }
3889 
3890 #undef __FUNCT__
3891 #define __FUNCT__ "MatGetCommunicationStructs"
3892 /*@C
3893   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3894 
3895   Not Collective
3896 
3897   Input Parameters:
3898 . A - The matrix in mpiaij format
3899 
3900   Output Parameter:
3901 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3902 . colmap - A map from global column index to local index into lvec
3903 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3904 
3905   Level: developer
3906 
3907 @*/
3908 #if defined (PETSC_USE_CTABLE)
3909 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3910 #else
3911 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3912 #endif
3913 {
3914   Mat_MPIAIJ *a;
3915 
3916   PetscFunctionBegin;
3917   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3918   PetscValidPointer(lvec, 2)
3919   PetscValidPointer(colmap, 3)
3920   PetscValidPointer(multScatter, 4)
3921   a = (Mat_MPIAIJ *) A->data;
3922   if (lvec) *lvec = a->lvec;
3923   if (colmap) *colmap = a->colmap;
3924   if (multScatter) *multScatter = a->Mvctx;
3925   PetscFunctionReturn(0);
3926 }
3927 
3928 /*MC
3929    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3930 
3931    Options Database Keys:
3932 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3933 
3934   Level: beginner
3935 
3936 .seealso: MatCreateMPIAIJ
3937 M*/
3938 
3939 EXTERN_C_BEGIN
3940 #undef __FUNCT__
3941 #define __FUNCT__ "MatCreate_MPIAIJ"
3942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3943 {
3944   Mat_MPIAIJ     *b;
3945   PetscErrorCode ierr;
3946   PetscMPIInt    size;
3947 
3948   PetscFunctionBegin;
3949   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3950 
3951   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3952   B->data         = (void*)b;
3953   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3954   B->factor       = 0;
3955   B->rmap.bs      = 1;
3956   B->assembled    = PETSC_FALSE;
3957   B->mapping      = 0;
3958 
3959   B->insertmode      = NOT_SET_VALUES;
3960   b->size            = size;
3961   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3962 
3963   /* build cache for off array entries formed */
3964   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3965   b->donotstash  = PETSC_FALSE;
3966   b->colmap      = 0;
3967   b->garray      = 0;
3968   b->roworiented = PETSC_TRUE;
3969 
3970   /* stuff used for matrix vector multiply */
3971   b->lvec      = PETSC_NULL;
3972   b->Mvctx     = PETSC_NULL;
3973 
3974   /* stuff for MatGetRow() */
3975   b->rowindices   = 0;
3976   b->rowvalues    = 0;
3977   b->getrowactive = PETSC_FALSE;
3978 
3979 
3980   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3981                                      "MatStoreValues_MPIAIJ",
3982                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3983   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3984                                      "MatRetrieveValues_MPIAIJ",
3985                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3986   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3987 				     "MatGetDiagonalBlock_MPIAIJ",
3988                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3989   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3990 				     "MatIsTranspose_MPIAIJ",
3991 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3992   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3993 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3994 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3996 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3997 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3999 				     "MatDiagonalScaleLocal_MPIAIJ",
4000 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
4001   PetscFunctionReturn(0);
4002 }
4003 EXTERN_C_END
4004 
4005 /*
4006     Special version for direct calls from Fortran
4007 */
4008 #if defined(PETSC_HAVE_FORTRAN_CAPS)
4009 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
4010 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
4011 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
4012 #endif
4013 
4014 /* Change these macros so can be used in void function */
4015 #undef CHKERRQ
4016 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
4017 #undef SETERRQ2
4018 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
4019 #undef SETERRQ
4020 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
4021 
4022 EXTERN_C_BEGIN
4023 #undef __FUNCT__
4024 #define __FUNCT__ "matsetvaluesmpiaij_"
4025 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
4026 {
4027   Mat            mat = *mmat;
4028   PetscInt       m = *mm, n = *mn;
4029   InsertMode     addv = *maddv;
4030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
4031   PetscScalar    value;
4032   PetscErrorCode ierr;
4033 
4034   MatPreallocated(mat);
4035   if (mat->insertmode == NOT_SET_VALUES) {
4036     mat->insertmode = addv;
4037   }
4038 #if defined(PETSC_USE_DEBUG)
4039   else if (mat->insertmode != addv) {
4040     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4041   }
4042 #endif
4043   {
4044   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
4045   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
4046   PetscTruth     roworiented = aij->roworiented;
4047 
4048   /* Some Variables required in the macro */
4049   Mat            A = aij->A;
4050   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4051   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4052   PetscScalar    *aa = a->a;
4053   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4054   Mat            B = aij->B;
4055   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4056   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4057   PetscScalar    *ba = b->a;
4058 
4059   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4060   PetscInt       nonew = a->nonew;
4061   PetscScalar    *ap1,*ap2;
4062 
4063   PetscFunctionBegin;
4064   for (i=0; i<m; i++) {
4065     if (im[i] < 0) continue;
4066 #if defined(PETSC_USE_DEBUG)
4067     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4068 #endif
4069     if (im[i] >= rstart && im[i] < rend) {
4070       row      = im[i] - rstart;
4071       lastcol1 = -1;
4072       rp1      = aj + ai[row];
4073       ap1      = aa + ai[row];
4074       rmax1    = aimax[row];
4075       nrow1    = ailen[row];
4076       low1     = 0;
4077       high1    = nrow1;
4078       lastcol2 = -1;
4079       rp2      = bj + bi[row];
4080       ap2      = ba + bi[row];
4081       rmax2    = bimax[row];
4082       nrow2    = bilen[row];
4083       low2     = 0;
4084       high2    = nrow2;
4085 
4086       for (j=0; j<n; j++) {
4087         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4088         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4089         if (in[j] >= cstart && in[j] < cend){
4090           col = in[j] - cstart;
4091           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4092         } else if (in[j] < 0) continue;
4093 #if defined(PETSC_USE_DEBUG)
4094         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4095 #endif
4096         else {
4097           if (mat->was_assembled) {
4098             if (!aij->colmap) {
4099               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4100             }
4101 #if defined (PETSC_USE_CTABLE)
4102             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4103 	    col--;
4104 #else
4105             col = aij->colmap[in[j]] - 1;
4106 #endif
4107             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4108               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4109               col =  in[j];
4110               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4111               B = aij->B;
4112               b = (Mat_SeqAIJ*)B->data;
4113               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4114               rp2      = bj + bi[row];
4115               ap2      = ba + bi[row];
4116               rmax2    = bimax[row];
4117               nrow2    = bilen[row];
4118               low2     = 0;
4119               high2    = nrow2;
4120               bm       = aij->B->rmap.n;
4121               ba = b->a;
4122             }
4123           } else col = in[j];
4124           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4125         }
4126       }
4127     } else {
4128       if (!aij->donotstash) {
4129         if (roworiented) {
4130           if (ignorezeroentries && v[i*n] == 0.0) continue;
4131           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4132         } else {
4133           if (ignorezeroentries && v[i] == 0.0) continue;
4134           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4135         }
4136       }
4137     }
4138   }}
4139   PetscFunctionReturnVoid();
4140 }
4141 EXTERN_C_END
4142