xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision fc42d0c83e6342acd52b3f136eb9633dcab6cf92)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
369   aij->rowvalues = 0;
370 
371   /* used by MatAXPY() */
372   a->xtoy = 0; b->xtoy = 0;
373   a->XtoY = 0; b->XtoY = 0;
374 
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
380 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
381 {
382   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
387   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatZeroRows_MPIAIJ"
393 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
394 {
395   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
396   PetscErrorCode ierr;
397   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
398   PetscInt       i,*owners = A->rmap.range;
399   PetscInt       *nprocs,j,idx,nsends,row;
400   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
401   PetscInt       *rvalues,count,base,slen,*source;
402   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
403   MPI_Comm       comm = A->comm;
404   MPI_Request    *send_waits,*recv_waits;
405   MPI_Status     recv_status,*send_status;
406 #if defined(PETSC_DEBUG)
407   PetscTruth     found = PETSC_FALSE;
408 #endif
409 
410   PetscFunctionBegin;
411   /*  first count number of contributors to each processor */
412   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
413   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
414   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
415   j = 0;
416   for (i=0; i<N; i++) {
417     if (lastidx > (idx = rows[i])) j = 0;
418     lastidx = idx;
419     for (; j<size; j++) {
420       if (idx >= owners[j] && idx < owners[j+1]) {
421         nprocs[2*j]++;
422         nprocs[2*j+1] = 1;
423         owner[i] = j;
424 #if defined(PETSC_DEBUG)
425         found = PETSC_TRUE;
426 #endif
427         break;
428       }
429     }
430 #if defined(PETSC_DEBUG)
431     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
432     found = PETSC_FALSE;
433 #endif
434   }
435   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
436 
437   /* inform other processors of number of messages and max length*/
438   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
439 
440   /* post receives:   */
441   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
442   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
443   for (i=0; i<nrecvs; i++) {
444     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
445   }
446 
447   /* do sends:
448       1) starts[i] gives the starting index in svalues for stuff going to
449          the ith processor
450   */
451   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
452   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
453   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
454   starts[0] = 0;
455   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
456   for (i=0; i<N; i++) {
457     svalues[starts[owner[i]]++] = rows[i];
458   }
459 
460   starts[0] = 0;
461   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
462   count = 0;
463   for (i=0; i<size; i++) {
464     if (nprocs[2*i+1]) {
465       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
466     }
467   }
468   ierr = PetscFree(starts);CHKERRQ(ierr);
469 
470   base = owners[rank];
471 
472   /*  wait on receives */
473   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
474   source = lens + nrecvs;
475   count  = nrecvs; slen = 0;
476   while (count) {
477     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
478     /* unpack receives into our local space */
479     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
480     source[imdex]  = recv_status.MPI_SOURCE;
481     lens[imdex]    = n;
482     slen          += n;
483     count--;
484   }
485   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
486 
487   /* move the data into the send scatter */
488   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
489   count = 0;
490   for (i=0; i<nrecvs; i++) {
491     values = rvalues + i*nmax;
492     for (j=0; j<lens[i]; j++) {
493       lrows[count++] = values[j] - base;
494     }
495   }
496   ierr = PetscFree(rvalues);CHKERRQ(ierr);
497   ierr = PetscFree(lens);CHKERRQ(ierr);
498   ierr = PetscFree(owner);CHKERRQ(ierr);
499   ierr = PetscFree(nprocs);CHKERRQ(ierr);
500 
501   /* actually zap the local rows */
502   /*
503         Zero the required rows. If the "diagonal block" of the matrix
504      is square and the user wishes to set the diagonal we use separate
505      code so that MatSetValues() is not called for each diagonal allocating
506      new memory, thus calling lots of mallocs and slowing things down.
507 
508        Contributed by: Matthew Knepley
509   */
510   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
511   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
512   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
513     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
514   } else if (diag != 0.0) {
515     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
516     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
517       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
518 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
519     }
520     for (i = 0; i < slen; i++) {
521       row  = lrows[i] + rstart;
522       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
523     }
524     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   } else {
527     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
528   }
529   ierr = PetscFree(lrows);CHKERRQ(ierr);
530 
531   /* wait on sends */
532   if (nsends) {
533     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
534     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
535     ierr = PetscFree(send_status);CHKERRQ(ierr);
536   }
537   ierr = PetscFree(send_waits);CHKERRQ(ierr);
538   ierr = PetscFree(svalues);CHKERRQ(ierr);
539 
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatMult_MPIAIJ"
545 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
546 {
547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
548   PetscErrorCode ierr;
549   PetscInt       nt;
550 
551   PetscFunctionBegin;
552   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
553   if (nt != A->cmap.n) {
554     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
555   }
556   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
558   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatMultAdd_MPIAIJ"
565 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
566 {
567   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
568   PetscErrorCode ierr;
569 
570   PetscFunctionBegin;
571   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
573   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
580 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
583   PetscErrorCode ierr;
584   PetscTruth     merged;
585 
586   PetscFunctionBegin;
587   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
588   /* do nondiagonal part */
589   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
590   if (!merged) {
591     /* send it on its way */
592     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
593     /* do local part */
594     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
595     /* receive remote parts: note this assumes the values are not actually */
596     /* added in yy until the next line, */
597     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
598   } else {
599     /* do local part */
600     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
601     /* send it on its way */
602     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603     /* values actually were received in the Begin() but we need to call this nop */
604     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 EXTERN_C_BEGIN
610 #undef __FUNCT__
611 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
612 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
613 {
614   MPI_Comm       comm;
615   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
616   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
617   IS             Me,Notme;
618   PetscErrorCode ierr;
619   PetscInt       M,N,first,last,*notme,i;
620   PetscMPIInt    size;
621 
622   PetscFunctionBegin;
623 
624   /* Easy test: symmetric diagonal block */
625   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
626   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
627   if (!*f) PetscFunctionReturn(0);
628   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
630   if (size == 1) PetscFunctionReturn(0);
631 
632   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
633   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
634   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
635   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
636   for (i=0; i<first; i++) notme[i] = i;
637   for (i=last; i<M; i++) notme[i-last+first] = i;
638   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
639   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
640   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
641   Aoff = Aoffs[0];
642   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
643   Boff = Boffs[0];
644   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
645   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
646   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
647   ierr = ISDestroy(Me);CHKERRQ(ierr);
648   ierr = ISDestroy(Notme);CHKERRQ(ierr);
649 
650   PetscFunctionReturn(0);
651 }
652 EXTERN_C_END
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
656 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   /* do nondiagonal part */
663   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
664   /* send it on its way */
665   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
666   /* do local part */
667   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
668   /* receive remote parts */
669   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 /*
674   This only works correctly for square matrices where the subblock A->A is the
675    diagonal block
676 */
677 #undef __FUNCT__
678 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
679 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
680 {
681   PetscErrorCode ierr;
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683 
684   PetscFunctionBegin;
685   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
686   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
687     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
688   }
689   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatScale_MPIAIJ"
695 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
696 {
697   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
698   PetscErrorCode ierr;
699 
700   PetscFunctionBegin;
701   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
702   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatDestroy_MPIAIJ"
708 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
709 {
710   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714 #if defined(PETSC_USE_LOG)
715   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
716 #endif
717   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
718   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
719   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
720 #if defined (PETSC_USE_CTABLE)
721   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
722 #else
723   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
724 #endif
725   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
726   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
727   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
728   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
729   ierr = PetscFree(aij);CHKERRQ(ierr);
730 
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
750   int               fd;
751   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
752   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (!rank) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->rmap.N;
762     header[2] = mat->cmap.N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->rmap.n;
768     range = mat->rmap.range;
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->rmap.n;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->rmap.n; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (!rank) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->rmap.n; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (!rank) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->rmap.n; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (!rank) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,iascii,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo    info;
886       PetscTruth inodes;
887 
888       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
889       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
890       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
891       if (!inodes) {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
893 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
896 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       }
898       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1019 {
1020   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec            bb1;
1023 
1024   PetscFunctionBegin;
1025   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1026 
1027   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1028 
1029   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1030     if (flag & SOR_ZERO_INITIAL_GUESS) {
1031       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1032       its--;
1033     }
1034 
1035     while (its--) {
1036       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038 
1039       /* update rhs: bb1 = bb - B*x */
1040       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1041       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1042 
1043       /* local sweep */
1044       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1045       CHKERRQ(ierr);
1046     }
1047   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052     while (its--) {
1053       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055 
1056       /* update rhs: bb1 = bb - B*x */
1057       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1058       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1059 
1060       /* local sweep */
1061       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1062       CHKERRQ(ierr);
1063     }
1064   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1065     if (flag & SOR_ZERO_INITIAL_GUESS) {
1066       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1067       its--;
1068     }
1069     while (its--) {
1070       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072 
1073       /* update rhs: bb1 = bb - B*x */
1074       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1075       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1076 
1077       /* local sweep */
1078       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1079       CHKERRQ(ierr);
1080     }
1081   } else {
1082     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1083   }
1084 
1085   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatPermute_MPIAIJ"
1091 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1092 {
1093   MPI_Comm       comm,pcomm;
1094   PetscInt       first,local_size,nrows,*rows;
1095   int            ntids;
1096   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1101   /* make a collective version of 'rowp' */
1102   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1103   if (pcomm==comm) {
1104     crowp = rowp;
1105   } else {
1106     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1107     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1108     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1109     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1110   }
1111   /* collect the global row permutation and invert it */
1112   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1113   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1114   if (pcomm!=comm) {
1115     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1116   }
1117   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1118   /* get the local target indices */
1119   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1121   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1123   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1124   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1125   /* the column permutation is so much easier;
1126      make a local version of 'colp' and invert it */
1127   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1128   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1129   if (ntids==1) {
1130     lcolp = colp;
1131   } else {
1132     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1133     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1134     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1135   }
1136   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1137   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1138   if (ntids>1) {
1139     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1140     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1141   }
1142   /* now we just get the submatrix */
1143   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1144   /* clean up */
1145   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1146   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1152 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1153 {
1154   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1155   Mat            A = mat->A,B = mat->B;
1156   PetscErrorCode ierr;
1157   PetscReal      isend[5],irecv[5];
1158 
1159   PetscFunctionBegin;
1160   info->block_size     = 1.0;
1161   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1163   isend[3] = info->memory;  isend[4] = info->mallocs;
1164   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else if (flag == MAT_GLOBAL_SUM) {
1181     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   }
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   info->rows_global       = (double)matin->rmap.N;
1192   info->columns_global    = (double)matin->cmap.N;
1193   info->rows_local        = (double)matin->rmap.n;
1194   info->columns_local     = (double)matin->cmap.N;
1195 
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSetOption_MPIAIJ"
1201 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   switch (op) {
1208   case MAT_NO_NEW_NONZERO_LOCATIONS:
1209   case MAT_YES_NEW_NONZERO_LOCATIONS:
1210   case MAT_COLUMNS_UNSORTED:
1211   case MAT_COLUMNS_SORTED:
1212   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1213   case MAT_KEEP_ZEROED_ROWS:
1214   case MAT_NEW_NONZERO_LOCATION_ERR:
1215   case MAT_USE_INODES:
1216   case MAT_DO_NOT_USE_INODES:
1217   case MAT_IGNORE_ZERO_ENTRIES:
1218     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1220     break;
1221   case MAT_ROW_ORIENTED:
1222     a->roworiented = PETSC_TRUE;
1223     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1224     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1225     break;
1226   case MAT_ROWS_SORTED:
1227   case MAT_ROWS_UNSORTED:
1228   case MAT_YES_NEW_DIAGONALS:
1229     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1230     break;
1231   case MAT_COLUMN_ORIENTED:
1232     a->roworiented = PETSC_FALSE;
1233     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1234     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1235     break;
1236   case MAT_IGNORE_OFF_PROC_ENTRIES:
1237     a->donotstash = PETSC_TRUE;
1238     break;
1239   case MAT_NO_NEW_DIAGONALS:
1240     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1241   case MAT_SYMMETRIC:
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ(PETSC_ERR_SUP,"unknown option");
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   if (!a->rank) {
1529     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1537 {
1538   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1543   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1549 {
1550   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatEqual_MPIAIJ"
1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1561 {
1562   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1563   Mat            a,b,c,d;
1564   PetscTruth     flg;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   a = matA->A; b = matA->B;
1569   c = matB->A; d = matB->B;
1570 
1571   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1572   if (flg) {
1573     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1574   }
1575   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatCopy_MPIAIJ"
1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1582 {
1583   PetscErrorCode ierr;
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1585   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1586 
1587   PetscFunctionBegin;
1588   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1589   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1590     /* because of the column compression in the off-processor part of the matrix a->B,
1591        the number of columns in a->B and b->B may be different, hence we cannot call
1592        the MatCopy() directly on the two parts. If need be, we can provide a more
1593        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1594        then copying the submatrices */
1595     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1596   } else {
1597     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1598     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1606 {
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #include "petscblaslapack.h"
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatAXPY_MPIAIJ"
1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1618 {
1619   PetscErrorCode ierr;
1620   PetscInt       i;
1621   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1622   PetscBLASInt   bnz,one=1;
1623   Mat_SeqAIJ     *x,*y;
1624 
1625   PetscFunctionBegin;
1626   if (str == SAME_NONZERO_PATTERN) {
1627     PetscScalar alpha = a;
1628     x = (Mat_SeqAIJ *)xx->A->data;
1629     y = (Mat_SeqAIJ *)yy->A->data;
1630     bnz = (PetscBLASInt)x->nz;
1631     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     bnz = (PetscBLASInt)x->nz;
1635     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1636   } else if (str == SUBSET_NONZERO_PATTERN) {
1637     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1638 
1639     x = (Mat_SeqAIJ *)xx->B->data;
1640     y = (Mat_SeqAIJ *)yy->B->data;
1641     if (y->xtoy && y->XtoY != xx->B) {
1642       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1643       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1644     }
1645     if (!y->xtoy) { /* get xtoy */
1646       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1647       y->XtoY = xx->B;
1648     }
1649     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1650   } else {
1651     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatConjugate_MPIAIJ"
1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1661 {
1662 #if defined(PETSC_USE_COMPLEX)
1663   PetscErrorCode ierr;
1664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1668   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1669 #else
1670   PetscFunctionBegin;
1671 #endif
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatRealPart_MPIAIJ"
1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1678 {
1679   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1680   PetscErrorCode ierr;
1681 
1682   PetscFunctionBegin;
1683   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1684   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1691 {
1692   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1697   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 /* -------------------------------------------------------------------*/
1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1703        MatGetRow_MPIAIJ,
1704        MatRestoreRow_MPIAIJ,
1705        MatMult_MPIAIJ,
1706 /* 4*/ MatMultAdd_MPIAIJ,
1707        MatMultTranspose_MPIAIJ,
1708        MatMultTransposeAdd_MPIAIJ,
1709        0,
1710        0,
1711        0,
1712 /*10*/ 0,
1713        0,
1714        0,
1715        MatRelax_MPIAIJ,
1716        MatTranspose_MPIAIJ,
1717 /*15*/ MatGetInfo_MPIAIJ,
1718        MatEqual_MPIAIJ,
1719        MatGetDiagonal_MPIAIJ,
1720        MatDiagonalScale_MPIAIJ,
1721        MatNorm_MPIAIJ,
1722 /*20*/ MatAssemblyBegin_MPIAIJ,
1723        MatAssemblyEnd_MPIAIJ,
1724        0,
1725        MatSetOption_MPIAIJ,
1726        MatZeroEntries_MPIAIJ,
1727 /*25*/ MatZeroRows_MPIAIJ,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*30*/ MatSetUpPreallocation_MPIAIJ,
1733        0,
1734        0,
1735        0,
1736        0,
1737 /*35*/ MatDuplicate_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*40*/ MatAXPY_MPIAIJ,
1743        MatGetSubMatrices_MPIAIJ,
1744        MatIncreaseOverlap_MPIAIJ,
1745        MatGetValues_MPIAIJ,
1746        MatCopy_MPIAIJ,
1747 /*45*/ MatPrintHelp_MPIAIJ,
1748        MatScale_MPIAIJ,
1749        0,
1750        0,
1751        0,
1752 /*50*/ MatSetBlockSize_MPIAIJ,
1753        0,
1754        0,
1755        0,
1756        0,
1757 /*55*/ MatFDColoringCreate_MPIAIJ,
1758        0,
1759        MatSetUnfactored_MPIAIJ,
1760        MatPermute_MPIAIJ,
1761        0,
1762 /*60*/ MatGetSubMatrix_MPIAIJ,
1763        MatDestroy_MPIAIJ,
1764        MatView_MPIAIJ,
1765        0,
1766        0,
1767 /*65*/ 0,
1768        0,
1769        0,
1770        0,
1771        0,
1772 /*70*/ 0,
1773        0,
1774        MatSetColoring_MPIAIJ,
1775 #if defined(PETSC_HAVE_ADIC)
1776        MatSetValuesAdic_MPIAIJ,
1777 #else
1778        0,
1779 #endif
1780        MatSetValuesAdifor_MPIAIJ,
1781 /*75*/ 0,
1782        0,
1783        0,
1784        0,
1785        0,
1786 /*80*/ 0,
1787        0,
1788        0,
1789        0,
1790 /*84*/ MatLoad_MPIAIJ,
1791        0,
1792        0,
1793        0,
1794        0,
1795        0,
1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1797        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1798        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1799        MatPtAP_Basic,
1800        MatPtAPSymbolic_MPIAIJ,
1801 /*95*/ MatPtAPNumeric_MPIAIJ,
1802        0,
1803        0,
1804        0,
1805        0,
1806 /*100*/0,
1807        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1808        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1809        MatConjugate_MPIAIJ,
1810        0,
1811 /*105*/MatSetValuesRow_MPIAIJ,
1812        MatRealPart_MPIAIJ,
1813        MatImaginaryPart_MPIAIJ};
1814 
1815 /* ----------------------------------------------------------------------------------------*/
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1821 {
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1827   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 EXTERN_C_BEGIN
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1836 {
1837   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1842   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 EXTERN_C_END
1846 
1847 #include "petscpc.h"
1848 EXTERN_C_BEGIN
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1852 {
1853   Mat_MPIAIJ     *b;
1854   PetscErrorCode ierr;
1855   PetscInt       i;
1856 
1857   PetscFunctionBegin;
1858   B->preallocated = PETSC_TRUE;
1859   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1860   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1861   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1862   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1863 
1864   B->rmap.bs = B->cmap.bs = 1;
1865   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1866   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1867   if (d_nnz) {
1868     for (i=0; i<B->rmap.n; i++) {
1869       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1870     }
1871   }
1872   if (o_nnz) {
1873     for (i=0; i<B->rmap.n; i++) {
1874       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1875     }
1876   }
1877   b = (Mat_MPIAIJ*)B->data;
1878 
1879   /* Explicitly create 2 MATSEQAIJ matrices. */
1880   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1881   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1882   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1883   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1884   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1885   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1886   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1887   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1888 
1889   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1890   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1891 
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1899 {
1900   Mat            mat;
1901   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   *newmat       = 0;
1906   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1907   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1908   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1909   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1910   a    = (Mat_MPIAIJ*)mat->data;
1911 
1912   mat->factor       = matin->factor;
1913   mat->rmap.bs      = matin->rmap.bs;
1914   mat->assembled    = PETSC_TRUE;
1915   mat->insertmode   = NOT_SET_VALUES;
1916   mat->preallocated = PETSC_TRUE;
1917 
1918   a->size           = oldmat->size;
1919   a->rank           = oldmat->rank;
1920   a->donotstash     = oldmat->donotstash;
1921   a->roworiented    = oldmat->roworiented;
1922   a->rowindices     = 0;
1923   a->rowvalues      = 0;
1924   a->getrowactive   = PETSC_FALSE;
1925 
1926   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1927   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1928 
1929   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1935     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1936     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     PetscInt len;
1941     len  = oldmat->B->cmap.n;
1942     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1944     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1945   } else a->garray = 0;
1946 
1947   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1949   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1953   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1955   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscsys.h"
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLoad_MPIAIJ"
1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1965 {
1966   Mat            A;
1967   PetscScalar    *vals,*svals;
1968   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1969   MPI_Status     status;
1970   PetscErrorCode ierr;
1971   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1972   PetscInt       i,nz,j,rstart,rend,mmax;
1973   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1974   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1975   PetscInt       cend,cstart,n,*rowners;
1976   int            fd;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1985   }
1986 
1987   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1988   M = header[1]; N = header[2];
1989   /* determine ownership of all rows */
1990   m    = M/size + ((M % size) > rank);
1991   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1992   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1993 
1994   /* First process needs enough room for process with most rows */
1995   if (!rank) {
1996     mmax       = rowners[1];
1997     for (i=2; i<size; i++) {
1998       mmax = PetscMax(mmax,rowners[i]);
1999     }
2000   } else mmax = m;
2001 
2002   rowners[0] = 0;
2003   for (i=2; i<=size; i++) {
2004     mmax       = PetscMax(mmax,rowners[i]);
2005     rowners[i] += rowners[i-1];
2006   }
2007   rstart = rowners[rank];
2008   rend   = rowners[rank+1];
2009 
2010   /* distribute row lengths to all processors */
2011   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2012   if (!rank) {
2013     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2014     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2015     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2016     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2017     for (j=0; j<m; j++) {
2018       procsnz[0] += ourlens[j];
2019     }
2020     for (i=1; i<size; i++) {
2021       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2022       /* calculate the number of nonzeros on each processor */
2023       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2024         procsnz[i] += rowlengths[j];
2025       }
2026       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2027     }
2028     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2029   } else {
2030     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2031   }
2032 
2033   if (!rank) {
2034     /* determine max buffer needed and allocate it */
2035     maxnz = 0;
2036     for (i=0; i<size; i++) {
2037       maxnz = PetscMax(maxnz,procsnz[i]);
2038     }
2039     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix column indices  */
2042     nz   = procsnz[0];
2043     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2045 
2046     /* read in every one elses and ship off */
2047     for (i=1; i<size; i++) {
2048       nz   = procsnz[i];
2049       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2050       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2051     }
2052     ierr = PetscFree(cols);CHKERRQ(ierr);
2053   } else {
2054     /* determine buffer space needed for message */
2055     nz = 0;
2056     for (i=0; i<m; i++) {
2057       nz += ourlens[i];
2058     }
2059     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2060 
2061     /* receive message of column indices*/
2062     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2063     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2064     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2065   }
2066 
2067   /* determine column ownership if matrix is not square */
2068   if (N != M) {
2069     n      = N/size + ((N % size) > rank);
2070     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     cstart = cend - n;
2072   } else {
2073     cstart = rstart;
2074     cend   = rend;
2075     n      = cend - cstart;
2076   }
2077 
2078   /* loop over local rows, determining number of off diagonal entries */
2079   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2080   jj = 0;
2081   for (i=0; i<m; i++) {
2082     for (j=0; j<ourlens[i]; j++) {
2083       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2084       jj++;
2085     }
2086   }
2087 
2088   /* create our matrix */
2089   for (i=0; i<m; i++) {
2090     ourlens[i] -= offlens[i];
2091   }
2092   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2093   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2094   ierr = MatSetType(A,type);CHKERRQ(ierr);
2095   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2096 
2097   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2098   for (i=0; i<m; i++) {
2099     ourlens[i] += offlens[i];
2100   }
2101 
2102   if (!rank) {
2103     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2104 
2105     /* read in my part of the matrix numerical values  */
2106     nz   = procsnz[0];
2107     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2108 
2109     /* insert into matrix */
2110     jj      = rstart;
2111     smycols = mycols;
2112     svals   = vals;
2113     for (i=0; i<m; i++) {
2114       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2115       smycols += ourlens[i];
2116       svals   += ourlens[i];
2117       jj++;
2118     }
2119 
2120     /* read in other processors and ship out */
2121     for (i=1; i<size; i++) {
2122       nz   = procsnz[i];
2123       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2124       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2127   } else {
2128     /* receive numeric values */
2129     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2130 
2131     /* receive message of values*/
2132     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2133     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2134     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2135 
2136     /* insert into matrix */
2137     jj      = rstart;
2138     smycols = mycols;
2139     svals   = vals;
2140     for (i=0; i<m; i++) {
2141       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2142       smycols += ourlens[i];
2143       svals   += ourlens[i];
2144       jj++;
2145     }
2146   }
2147   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2148   ierr = PetscFree(vals);CHKERRQ(ierr);
2149   ierr = PetscFree(mycols);CHKERRQ(ierr);
2150   ierr = PetscFree(rowners);CHKERRQ(ierr);
2151 
2152   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   *newmat = A;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2160 /*
2161     Not great since it makes two copies of the submatrix, first an SeqAIJ
2162   in local and then by concatenating the local matrices the end result.
2163   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2164 */
2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2166 {
2167   PetscErrorCode ierr;
2168   PetscMPIInt    rank,size;
2169   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2170   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2171   Mat            *local,M,Mreuse;
2172   PetscScalar    *vwork,*aa;
2173   MPI_Comm       comm = mat->comm;
2174   Mat_SeqAIJ     *aij;
2175 
2176 
2177   PetscFunctionBegin;
2178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2180 
2181   if (call ==  MAT_REUSE_MATRIX) {
2182     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2183     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2184     local = &Mreuse;
2185     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2186   } else {
2187     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2188     Mreuse = *local;
2189     ierr   = PetscFree(local);CHKERRQ(ierr);
2190   }
2191 
2192   /*
2193       m - number of local rows
2194       n - number of columns (same on all processors)
2195       rstart - first row in new global matrix generated
2196   */
2197   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2198   if (call == MAT_INITIAL_MATRIX) {
2199     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2200     ii  = aij->i;
2201     jj  = aij->j;
2202 
2203     /*
2204         Determine the number of non-zeros in the diagonal and off-diagonal
2205         portions of the matrix in order to do correct preallocation
2206     */
2207 
2208     /* first get start and end of "diagonal" columns */
2209     if (csize == PETSC_DECIDE) {
2210       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2211       if (mglobal == n) { /* square matrix */
2212 	nlocal = m;
2213       } else {
2214         nlocal = n/size + ((n % size) > rank);
2215       }
2216     } else {
2217       nlocal = csize;
2218     }
2219     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2220     rstart = rend - nlocal;
2221     if (rank == size - 1 && rend != n) {
2222       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2223     }
2224 
2225     /* next, compute all the lengths */
2226     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2227     olens = dlens + m;
2228     for (i=0; i<m; i++) {
2229       jend = ii[i+1] - ii[i];
2230       olen = 0;
2231       dlen = 0;
2232       for (j=0; j<jend; j++) {
2233         if (*jj < rstart || *jj >= rend) olen++;
2234         else dlen++;
2235         jj++;
2236       }
2237       olens[i] = olen;
2238       dlens[i] = dlen;
2239     }
2240     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2241     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2242     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2244     ierr = PetscFree(dlens);CHKERRQ(ierr);
2245   } else {
2246     PetscInt ml,nl;
2247 
2248     M = *newmat;
2249     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2250     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2251     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2252     /*
2253          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2254        rather than the slower MatSetValues().
2255     */
2256     M->was_assembled = PETSC_TRUE;
2257     M->assembled     = PETSC_FALSE;
2258   }
2259   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2260   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2261   ii  = aij->i;
2262   jj  = aij->j;
2263   aa  = aij->a;
2264   for (i=0; i<m; i++) {
2265     row   = rstart + i;
2266     nz    = ii[i+1] - ii[i];
2267     cwork = jj;     jj += nz;
2268     vwork = aa;     aa += nz;
2269     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2270   }
2271 
2272   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   *newmat = M;
2275 
2276   /* save submatrix used in processor for next request */
2277   if (call ==  MAT_INITIAL_MATRIX) {
2278     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2279     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2280   }
2281 
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 EXTERN_C_BEGIN
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2289 {
2290   PetscInt       m,cstart, cend,j,nnz,i,d;
2291   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2292   const PetscInt *JJ;
2293   PetscScalar    *values;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2298 
2299   B->rmap.bs = B->cmap.bs = 1;
2300   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2301   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2302   m      = B->rmap.n;
2303   cstart = B->cmap.rstart;
2304   cend   = B->cmap.rend;
2305   rstart = B->rmap.rstart;
2306 
2307   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2308   o_nnz = d_nnz + m;
2309 
2310   for (i=0; i<m; i++) {
2311     nnz     = I[i+1]- I[i];
2312     JJ      = J + I[i];
2313     nnz_max = PetscMax(nnz_max,nnz);
2314     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2315     for (j=0; j<nnz; j++) {
2316       if (*JJ >= cstart) break;
2317       JJ++;
2318     }
2319     d = 0;
2320     for (; j<nnz; j++) {
2321       if (*JJ++ >= cend) break;
2322       d++;
2323     }
2324     d_nnz[i] = d;
2325     o_nnz[i] = nnz - d;
2326   }
2327   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2329 
2330   if (v) values = (PetscScalar*)v;
2331   else {
2332     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2333     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2334   }
2335 
2336   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2337   for (i=0; i<m; i++) {
2338     ii   = i + rstart;
2339     nnz  = I[i+1]- I[i];
2340     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2341   }
2342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2345 
2346   if (!v) {
2347     ierr = PetscFree(values);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2355 /*@
2356    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2357    (the default parallel PETSc format).
2358 
2359    Collective on MPI_Comm
2360 
2361    Input Parameters:
2362 +  B - the matrix
2363 .  i - the indices into j for the start of each local row (starts with zero)
2364 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2365 -  v - optional values in the matrix
2366 
2367    Level: developer
2368 
2369 .keywords: matrix, aij, compressed row, sparse, parallel
2370 
2371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2372 @*/
2373 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2374 {
2375   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2376 
2377   PetscFunctionBegin;
2378   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2379   if (f) {
2380     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2387 /*@C
2388    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2389    (the default parallel PETSc format).  For good matrix assembly performance
2390    the user should preallocate the matrix storage by setting the parameters
2391    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2392    performance can be increased by more than a factor of 50.
2393 
2394    Collective on MPI_Comm
2395 
2396    Input Parameters:
2397 +  A - the matrix
2398 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2399            (same value is used for all local rows)
2400 .  d_nnz - array containing the number of nonzeros in the various rows of the
2401            DIAGONAL portion of the local submatrix (possibly different for each row)
2402            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2403            The size of this array is equal to the number of local rows, i.e 'm'.
2404            You must leave room for the diagonal entry even if it is zero.
2405 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2406            submatrix (same value is used for all local rows).
2407 -  o_nnz - array containing the number of nonzeros in the various rows of the
2408            OFF-DIAGONAL portion of the local submatrix (possibly different for
2409            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2410            structure. The size of this array is equal to the number
2411            of local rows, i.e 'm'.
2412 
2413    If the *_nnz parameter is given then the *_nz parameter is ignored
2414 
2415    The AIJ format (also called the Yale sparse matrix format or
2416    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2417    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2418 
2419    The parallel matrix is partitioned such that the first m0 rows belong to
2420    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2421    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2422 
2423    The DIAGONAL portion of the local submatrix of a processor can be defined
2424    as the submatrix which is obtained by extraction the part corresponding
2425    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2426    first row that belongs to the processor, and r2 is the last row belonging
2427    to the this processor. This is a square mxm matrix. The remaining portion
2428    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2429 
2430    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2431 
2432    Example usage:
2433 
2434    Consider the following 8x8 matrix with 34 non-zero values, that is
2435    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2436    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2437    as follows:
2438 
2439 .vb
2440             1  2  0  |  0  3  0  |  0  4
2441     Proc0   0  5  6  |  7  0  0  |  8  0
2442             9  0 10  | 11  0  0  | 12  0
2443     -------------------------------------
2444            13  0 14  | 15 16 17  |  0  0
2445     Proc1   0 18  0  | 19 20 21  |  0  0
2446             0  0  0  | 22 23  0  | 24  0
2447     -------------------------------------
2448     Proc2  25 26 27  |  0  0 28  | 29  0
2449            30  0  0  | 31 32 33  |  0 34
2450 .ve
2451 
2452    This can be represented as a collection of submatrices as:
2453 
2454 .vb
2455       A B C
2456       D E F
2457       G H I
2458 .ve
2459 
2460    Where the submatrices A,B,C are owned by proc0, D,E,F are
2461    owned by proc1, G,H,I are owned by proc2.
2462 
2463    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2464    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2465    The 'M','N' parameters are 8,8, and have the same values on all procs.
2466 
2467    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2468    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2469    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2470    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2471    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2472    matrix, ans [DF] as another SeqAIJ matrix.
2473 
2474    When d_nz, o_nz parameters are specified, d_nz storage elements are
2475    allocated for every row of the local diagonal submatrix, and o_nz
2476    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2477    One way to choose d_nz and o_nz is to use the max nonzerors per local
2478    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2479    In this case, the values of d_nz,o_nz are:
2480 .vb
2481      proc0 : dnz = 2, o_nz = 2
2482      proc1 : dnz = 3, o_nz = 2
2483      proc2 : dnz = 1, o_nz = 4
2484 .ve
2485    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2486    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2487    for proc3. i.e we are using 12+15+10=37 storage locations to store
2488    34 values.
2489 
2490    When d_nnz, o_nnz parameters are specified, the storage is specified
2491    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2492    In the above case the values for d_nnz,o_nnz are:
2493 .vb
2494      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2495      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2496      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2497 .ve
2498    Here the space allocated is sum of all the above values i.e 34, and
2499    hence pre-allocation is perfect.
2500 
2501    Level: intermediate
2502 
2503 .keywords: matrix, aij, compressed row, sparse, parallel
2504 
2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2506           MPIAIJ
2507 @*/
2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2509 {
2510   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2514   if (f) {
2515     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatCreateMPIAIJ"
2522 /*@C
2523    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2524    (the default parallel PETSc format).  For good matrix assembly performance
2525    the user should preallocate the matrix storage by setting the parameters
2526    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2527    performance can be increased by more than a factor of 50.
2528 
2529    Collective on MPI_Comm
2530 
2531    Input Parameters:
2532 +  comm - MPI communicator
2533 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2534            This value should be the same as the local size used in creating the
2535            y vector for the matrix-vector product y = Ax.
2536 .  n - This value should be the same as the local size used in creating the
2537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2538        calculated if N is given) For square matrices n is almost always m.
2539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2541 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2542            (same value is used for all local rows)
2543 .  d_nnz - array containing the number of nonzeros in the various rows of the
2544            DIAGONAL portion of the local submatrix (possibly different for each row)
2545            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2546            The size of this array is equal to the number of local rows, i.e 'm'.
2547            You must leave room for the diagonal entry even if it is zero.
2548 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2549            submatrix (same value is used for all local rows).
2550 -  o_nnz - array containing the number of nonzeros in the various rows of the
2551            OFF-DIAGONAL portion of the local submatrix (possibly different for
2552            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2553            structure. The size of this array is equal to the number
2554            of local rows, i.e 'm'.
2555 
2556    Output Parameter:
2557 .  A - the matrix
2558 
2559    Notes:
2560    If the *_nnz parameter is given then the *_nz parameter is ignored
2561 
2562    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2563    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2564    storage requirements for this matrix.
2565 
2566    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2567    processor than it must be used on all processors that share the object for
2568    that argument.
2569 
2570    The user MUST specify either the local or global matrix dimensions
2571    (possibly both).
2572 
2573    The parallel matrix is partitioned such that the first m0 rows belong to
2574    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2575    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2576 
2577    The DIAGONAL portion of the local submatrix of a processor can be defined
2578    as the submatrix which is obtained by extraction the part corresponding
2579    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2580    first row that belongs to the processor, and r2 is the last row belonging
2581    to the this processor. This is a square mxm matrix. The remaining portion
2582    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2583 
2584    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2585 
2586    When calling this routine with a single process communicator, a matrix of
2587    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2588    type of communicator, use the construction mechanism:
2589      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2590 
2591    By default, this format uses inodes (identical nodes) when possible.
2592    We search for consecutive rows with the same nonzero structure, thereby
2593    reusing matrix information to achieve increased efficiency.
2594 
2595    Options Database Keys:
2596 +  -mat_no_inode  - Do not use inodes
2597 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2598 -  -mat_aij_oneindex - Internally use indexing starting at 1
2599         rather than 0.  Note that when calling MatSetValues(),
2600         the user still MUST index entries starting at 0!
2601 
2602 
2603    Example usage:
2604 
2605    Consider the following 8x8 matrix with 34 non-zero values, that is
2606    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2607    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2608    as follows:
2609 
2610 .vb
2611             1  2  0  |  0  3  0  |  0  4
2612     Proc0   0  5  6  |  7  0  0  |  8  0
2613             9  0 10  | 11  0  0  | 12  0
2614     -------------------------------------
2615            13  0 14  | 15 16 17  |  0  0
2616     Proc1   0 18  0  | 19 20 21  |  0  0
2617             0  0  0  | 22 23  0  | 24  0
2618     -------------------------------------
2619     Proc2  25 26 27  |  0  0 28  | 29  0
2620            30  0  0  | 31 32 33  |  0 34
2621 .ve
2622 
2623    This can be represented as a collection of submatrices as:
2624 
2625 .vb
2626       A B C
2627       D E F
2628       G H I
2629 .ve
2630 
2631    Where the submatrices A,B,C are owned by proc0, D,E,F are
2632    owned by proc1, G,H,I are owned by proc2.
2633 
2634    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2635    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2636    The 'M','N' parameters are 8,8, and have the same values on all procs.
2637 
2638    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2639    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2640    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2641    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2642    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2643    matrix, ans [DF] as another SeqAIJ matrix.
2644 
2645    When d_nz, o_nz parameters are specified, d_nz storage elements are
2646    allocated for every row of the local diagonal submatrix, and o_nz
2647    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2648    One way to choose d_nz and o_nz is to use the max nonzerors per local
2649    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2650    In this case, the values of d_nz,o_nz are:
2651 .vb
2652      proc0 : dnz = 2, o_nz = 2
2653      proc1 : dnz = 3, o_nz = 2
2654      proc2 : dnz = 1, o_nz = 4
2655 .ve
2656    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2657    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2658    for proc3. i.e we are using 12+15+10=37 storage locations to store
2659    34 values.
2660 
2661    When d_nnz, o_nnz parameters are specified, the storage is specified
2662    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2663    In the above case the values for d_nnz,o_nnz are:
2664 .vb
2665      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2666      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2667      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2668 .ve
2669    Here the space allocated is sum of all the above values i.e 34, and
2670    hence pre-allocation is perfect.
2671 
2672    Level: intermediate
2673 
2674 .keywords: matrix, aij, compressed row, sparse, parallel
2675 
2676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2677           MPIAIJ
2678 @*/
2679 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2680 {
2681   PetscErrorCode ierr;
2682   PetscMPIInt    size;
2683 
2684   PetscFunctionBegin;
2685   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2686   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2688   if (size > 1) {
2689     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2690     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2691   } else {
2692     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2693     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2694   }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2700 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   *Ad     = a->A;
2706   *Ao     = a->B;
2707   *colmap = a->garray;
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2713 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2714 {
2715   PetscErrorCode ierr;
2716   PetscInt       i;
2717   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2718 
2719   PetscFunctionBegin;
2720   if (coloring->ctype == IS_COLORING_LOCAL) {
2721     ISColoringValue *allcolors,*colors;
2722     ISColoring      ocoloring;
2723 
2724     /* set coloring for diagonal portion */
2725     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2726 
2727     /* set coloring for off-diagonal portion */
2728     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2729     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2730     for (i=0; i<a->B->cmap.n; i++) {
2731       colors[i] = allcolors[a->garray[i]];
2732     }
2733     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2734     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2735     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2736     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2737   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2738     ISColoringValue *colors;
2739     PetscInt        *larray;
2740     ISColoring      ocoloring;
2741 
2742     /* set coloring for diagonal portion */
2743     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2744     for (i=0; i<a->A->cmap.n; i++) {
2745       larray[i] = i + A->cmap.rstart;
2746     }
2747     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2748     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2749     for (i=0; i<a->A->cmap.n; i++) {
2750       colors[i] = coloring->colors[larray[i]];
2751     }
2752     ierr = PetscFree(larray);CHKERRQ(ierr);
2753     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2754     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2755     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2756 
2757     /* set coloring for off-diagonal portion */
2758     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2759     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2760     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2761     for (i=0; i<a->B->cmap.n; i++) {
2762       colors[i] = coloring->colors[larray[i]];
2763     }
2764     ierr = PetscFree(larray);CHKERRQ(ierr);
2765     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2766     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2767     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2768   } else {
2769     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2770   }
2771 
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #if defined(PETSC_HAVE_ADIC)
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2778 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2779 {
2780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2785   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 #endif
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2792 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2799   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatMerge"
2805 /*@C
2806       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2807                  matrices from each processor
2808 
2809     Collective on MPI_Comm
2810 
2811    Input Parameters:
2812 +    comm - the communicators the parallel matrix will live on
2813 .    inmat - the input sequential matrices
2814 .    n - number of local columns (or PETSC_DECIDE)
2815 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2816 
2817    Output Parameter:
2818 .    outmat - the parallel matrix generated
2819 
2820     Level: advanced
2821 
2822    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2823 
2824 @*/
2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2826 {
2827   PetscErrorCode ierr;
2828   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2829   PetscInt       *indx;
2830   PetscScalar    *values;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2834   if (scall == MAT_INITIAL_MATRIX){
2835     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2836     if (n == PETSC_DECIDE){
2837       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2838     }
2839     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2840     rstart -= m;
2841 
2842     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2843     for (i=0;i<m;i++) {
2844       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2845       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2846       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2847     }
2848     /* This routine will ONLY return MPIAIJ type matrix */
2849     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2850     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2851     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2852     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2853     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2854 
2855   } else if (scall == MAT_REUSE_MATRIX){
2856     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2857   } else {
2858     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2859   }
2860 
2861   for (i=0;i<m;i++) {
2862     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2863     I    = i + rstart;
2864     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2865     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2866   }
2867   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2868   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870 
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatFileSplit"
2876 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2877 {
2878   PetscErrorCode    ierr;
2879   PetscMPIInt       rank;
2880   PetscInt          m,N,i,rstart,nnz;
2881   size_t            len;
2882   const PetscInt    *indx;
2883   PetscViewer       out;
2884   char              *name;
2885   Mat               B;
2886   const PetscScalar *values;
2887 
2888   PetscFunctionBegin;
2889   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2890   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2891   /* Should this be the type of the diagonal block of A? */
2892   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2893   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2894   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2895   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2896   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2897   for (i=0;i<m;i++) {
2898     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2899     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2900     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2901   }
2902   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904 
2905   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2906   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2907   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2908   sprintf(name,"%s.%d",outfile,rank);
2909   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2910   ierr = PetscFree(name);
2911   ierr = MatView(B,out);CHKERRQ(ierr);
2912   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2913   ierr = MatDestroy(B);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2920 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2921 {
2922   PetscErrorCode       ierr;
2923   Mat_Merge_SeqsToMPI  *merge;
2924   PetscObjectContainer container;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2928   if (container) {
2929     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2930     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2931     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2935     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2936     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2937     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2938     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2939     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2940 
2941     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2942     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2943   }
2944   ierr = PetscFree(merge);CHKERRQ(ierr);
2945 
2946   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #include "src/mat/utils/freespace.h"
2951 #include "petscbt.h"
2952 static PetscEvent logkey_seqstompinum = 0;
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2955 /*@C
2956       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2957                  matrices from each processor
2958 
2959     Collective on MPI_Comm
2960 
2961    Input Parameters:
2962 +    comm - the communicators the parallel matrix will live on
2963 .    seqmat - the input sequential matrices
2964 .    m - number of local rows (or PETSC_DECIDE)
2965 .    n - number of local columns (or PETSC_DECIDE)
2966 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2967 
2968    Output Parameter:
2969 .    mpimat - the parallel matrix generated
2970 
2971     Level: advanced
2972 
2973    Notes:
2974      The dimensions of the sequential matrix in each processor MUST be the same.
2975      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2976      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2977 @*/
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2979 {
2980   PetscErrorCode       ierr;
2981   MPI_Comm             comm=mpimat->comm;
2982   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2983   PetscMPIInt          size,rank,taga,*len_s;
2984   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
2985   PetscInt             proc,m;
2986   PetscInt             **buf_ri,**buf_rj;
2987   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2988   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2989   MPI_Request          *s_waits,*r_waits;
2990   MPI_Status           *status;
2991   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2992   Mat_Merge_SeqsToMPI  *merge;
2993   PetscObjectContainer container;
2994 
2995   PetscFunctionBegin;
2996   if (!logkey_seqstompinum) {
2997     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2998   }
2999   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3000 
3001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3002   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3003 
3004   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3005   if (container) {
3006     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3007   }
3008   bi     = merge->bi;
3009   bj     = merge->bj;
3010   buf_ri = merge->buf_ri;
3011   buf_rj = merge->buf_rj;
3012 
3013   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3014   owners = merge->rowmap.range;
3015   len_s  = merge->len_s;
3016 
3017   /* send and recv matrix values */
3018   /*-----------------------------*/
3019   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3020   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3021 
3022   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3023   for (proc=0,k=0; proc<size; proc++){
3024     if (!len_s[proc]) continue;
3025     i = owners[proc];
3026     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3027     k++;
3028   }
3029 
3030   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3031   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3032   ierr = PetscFree(status);CHKERRQ(ierr);
3033 
3034   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3035   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3036 
3037   /* insert mat values of mpimat */
3038   /*----------------------------*/
3039   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3040   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3041   nextrow = buf_ri_k + merge->nrecv;
3042   nextai  = nextrow + merge->nrecv;
3043 
3044   for (k=0; k<merge->nrecv; k++){
3045     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3046     nrows = *(buf_ri_k[k]);
3047     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3048     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3049   }
3050 
3051   /* set values of ba */
3052   m = merge->rowmap.n;
3053   for (i=0; i<m; i++) {
3054     arow = owners[rank] + i;
3055     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3056     bnzi = bi[i+1] - bi[i];
3057     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3058 
3059     /* add local non-zero vals of this proc's seqmat into ba */
3060     anzi = ai[arow+1] - ai[arow];
3061     aj   = a->j + ai[arow];
3062     aa   = a->a + ai[arow];
3063     nextaj = 0;
3064     for (j=0; nextaj<anzi; j++){
3065       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3066         ba_i[j] += aa[nextaj++];
3067       }
3068     }
3069 
3070     /* add received vals into ba */
3071     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3072       /* i-th row */
3073       if (i == *nextrow[k]) {
3074         anzi = *(nextai[k]+1) - *nextai[k];
3075         aj   = buf_rj[k] + *(nextai[k]);
3076         aa   = abuf_r[k] + *(nextai[k]);
3077         nextaj = 0;
3078         for (j=0; nextaj<anzi; j++){
3079           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3080             ba_i[j] += aa[nextaj++];
3081           }
3082         }
3083         nextrow[k]++; nextai[k]++;
3084       }
3085     }
3086     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3087   }
3088   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090 
3091   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3092   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3093   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3094   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 static PetscEvent logkey_seqstompisym = 0;
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3101 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3102 {
3103   PetscErrorCode       ierr;
3104   Mat                  B_mpi;
3105   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3106   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3107   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3108   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3109   PetscInt             len,proc,*dnz,*onz;
3110   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3111   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3112   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3113   MPI_Status           *status;
3114   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3115   PetscBT              lnkbt;
3116   Mat_Merge_SeqsToMPI  *merge;
3117   PetscObjectContainer container;
3118 
3119   PetscFunctionBegin;
3120   if (!logkey_seqstompisym) {
3121     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3122   }
3123   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3124 
3125   /* make sure it is a PETSc comm */
3126   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3127   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3128   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129 
3130   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3131   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3132 
3133   /* determine row ownership */
3134   /*---------------------------------------------------------*/
3135   merge->rowmap.n = m;
3136   merge->rowmap.N = M;
3137   merge->rowmap.bs = 1;
3138   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3139   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3140   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3141 
3142   m      = merge->rowmap.n;
3143   M      = merge->rowmap.N;
3144   owners = merge->rowmap.range;
3145 
3146   /* determine the number of messages to send, their lengths */
3147   /*---------------------------------------------------------*/
3148   len_s  = merge->len_s;
3149 
3150   len = 0;  /* length of buf_si[] */
3151   merge->nsend = 0;
3152   for (proc=0; proc<size; proc++){
3153     len_si[proc] = 0;
3154     if (proc == rank){
3155       len_s[proc] = 0;
3156     } else {
3157       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3158       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3159     }
3160     if (len_s[proc]) {
3161       merge->nsend++;
3162       nrows = 0;
3163       for (i=owners[proc]; i<owners[proc+1]; i++){
3164         if (ai[i+1] > ai[i]) nrows++;
3165       }
3166       len_si[proc] = 2*(nrows+1);
3167       len += len_si[proc];
3168     }
3169   }
3170 
3171   /* determine the number and length of messages to receive for ij-structure */
3172   /*-------------------------------------------------------------------------*/
3173   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3174   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3175 
3176   /* post the Irecv of j-structure */
3177   /*-------------------------------*/
3178   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr);
3179   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3180 
3181   /* post the Isend of j-structure */
3182   /*--------------------------------*/
3183   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3184   sj_waits = si_waits + merge->nsend;
3185 
3186   for (proc=0, k=0; proc<size; proc++){
3187     if (!len_s[proc]) continue;
3188     i = owners[proc];
3189     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3190     k++;
3191   }
3192 
3193   /* receives and sends of j-structure are complete */
3194   /*------------------------------------------------*/
3195   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3196   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3197 
3198   /* send and recv i-structure */
3199   /*---------------------------*/
3200   ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr);
3201   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3202 
3203   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3204   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3205   for (proc=0,k=0; proc<size; proc++){
3206     if (!len_s[proc]) continue;
3207     /* form outgoing message for i-structure:
3208          buf_si[0]:                 nrows to be sent
3209                [1:nrows]:           row index (global)
3210                [nrows+1:2*nrows+1]: i-structure index
3211     */
3212     /*-------------------------------------------*/
3213     nrows = len_si[proc]/2 - 1;
3214     buf_si_i    = buf_si + nrows+1;
3215     buf_si[0]   = nrows;
3216     buf_si_i[0] = 0;
3217     nrows = 0;
3218     for (i=owners[proc]; i<owners[proc+1]; i++){
3219       anzi = ai[i+1] - ai[i];
3220       if (anzi) {
3221         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3222         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3223         nrows++;
3224       }
3225     }
3226     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3227     k++;
3228     buf_si += len_si[proc];
3229   }
3230 
3231   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3232   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3233 
3234   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3235   for (i=0; i<merge->nrecv; i++){
3236     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3237   }
3238 
3239   ierr = PetscFree(len_si);CHKERRQ(ierr);
3240   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3241   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3242   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3244   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3245   ierr = PetscFree(status);CHKERRQ(ierr);
3246 
3247   /* compute a local seq matrix in each processor */
3248   /*----------------------------------------------*/
3249   /* allocate bi array and free space for accumulating nonzero column info */
3250   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3251   bi[0] = 0;
3252 
3253   /* create and initialize a linked list */
3254   nlnk = N+1;
3255   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3256 
3257   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3258   len = 0;
3259   len  = ai[owners[rank+1]] - ai[owners[rank]];
3260   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3261   current_space = free_space;
3262 
3263   /* determine symbolic info for each local row */
3264   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3265   nextrow = buf_ri_k + merge->nrecv;
3266   nextai  = nextrow + merge->nrecv;
3267   for (k=0; k<merge->nrecv; k++){
3268     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3269     nrows = *buf_ri_k[k];
3270     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3271     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3272   }
3273 
3274   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3275   len = 0;
3276   for (i=0;i<m;i++) {
3277     bnzi   = 0;
3278     /* add local non-zero cols of this proc's seqmat into lnk */
3279     arow   = owners[rank] + i;
3280     anzi   = ai[arow+1] - ai[arow];
3281     aj     = a->j + ai[arow];
3282     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3283     bnzi += nlnk;
3284     /* add received col data into lnk */
3285     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3286       if (i == *nextrow[k]) { /* i-th row */
3287         anzi = *(nextai[k]+1) - *nextai[k];
3288         aj   = buf_rj[k] + *nextai[k];
3289         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3290         bnzi += nlnk;
3291         nextrow[k]++; nextai[k]++;
3292       }
3293     }
3294     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3295 
3296     /* if free space is not available, make more free space */
3297     if (current_space->local_remaining<bnzi) {
3298       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3299       nspacedouble++;
3300     }
3301     /* copy data into free space, then initialize lnk */
3302     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3303     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3304 
3305     current_space->array           += bnzi;
3306     current_space->local_used      += bnzi;
3307     current_space->local_remaining -= bnzi;
3308 
3309     bi[i+1] = bi[i] + bnzi;
3310   }
3311 
3312   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3313 
3314   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3315   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3316   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3317 
3318   /* create symbolic parallel matrix B_mpi */
3319   /*---------------------------------------*/
3320   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3321   if (n==PETSC_DECIDE) {
3322     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3323   } else {
3324     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3325   }
3326   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3327   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3328   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3329 
3330   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3331   B_mpi->assembled     = PETSC_FALSE;
3332   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3333   merge->bi            = bi;
3334   merge->bj            = bj;
3335   merge->buf_ri        = buf_ri;
3336   merge->buf_rj        = buf_rj;
3337   merge->coi           = PETSC_NULL;
3338   merge->coj           = PETSC_NULL;
3339   merge->owners_co     = PETSC_NULL;
3340 
3341   /* attach the supporting struct to B_mpi for reuse */
3342   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3343   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3344   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3345   *mpimat = B_mpi;
3346 
3347   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3348   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 static PetscEvent logkey_seqstompi = 0;
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatMerge_SeqsToMPI"
3355 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3356 {
3357   PetscErrorCode   ierr;
3358 
3359   PetscFunctionBegin;
3360   if (!logkey_seqstompi) {
3361     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3362   }
3363   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3364   if (scall == MAT_INITIAL_MATRIX){
3365     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3366   }
3367   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3368   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3369   PetscFunctionReturn(0);
3370 }
3371 static PetscEvent logkey_getlocalmat = 0;
3372 #undef __FUNCT__
3373 #define __FUNCT__ "MatGetLocalMat"
3374 /*@C
3375      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3376 
3377     Not Collective
3378 
3379    Input Parameters:
3380 +    A - the matrix
3381 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3382 
3383    Output Parameter:
3384 .    A_loc - the local sequential matrix generated
3385 
3386     Level: developer
3387 
3388 @*/
3389 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3390 {
3391   PetscErrorCode  ierr;
3392   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3393   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3394   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3395   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3396   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3397   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3398 
3399   PetscFunctionBegin;
3400   if (!logkey_getlocalmat) {
3401     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3402   }
3403   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3404   if (scall == MAT_INITIAL_MATRIX){
3405     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3406     ci[0] = 0;
3407     for (i=0; i<am; i++){
3408       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3409     }
3410     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3411     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3412     k = 0;
3413     for (i=0; i<am; i++) {
3414       ncols_o = bi[i+1] - bi[i];
3415       ncols_d = ai[i+1] - ai[i];
3416       /* off-diagonal portion of A */
3417       for (jo=0; jo<ncols_o; jo++) {
3418         col = cmap[*bj];
3419         if (col >= cstart) break;
3420         cj[k]   = col; bj++;
3421         ca[k++] = *ba++;
3422       }
3423       /* diagonal portion of A */
3424       for (j=0; j<ncols_d; j++) {
3425         cj[k]   = cstart + *aj++;
3426         ca[k++] = *aa++;
3427       }
3428       /* off-diagonal portion of A */
3429       for (j=jo; j<ncols_o; j++) {
3430         cj[k]   = cmap[*bj++];
3431         ca[k++] = *ba++;
3432       }
3433     }
3434     /* put together the new matrix */
3435     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3436     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3437     /* Since these are PETSc arrays, change flags to free them as necessary. */
3438     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3439     mat->freedata = PETSC_TRUE;
3440     mat->nonew    = 0;
3441   } else if (scall == MAT_REUSE_MATRIX){
3442     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3443     ci = mat->i; cj = mat->j; ca = mat->a;
3444     for (i=0; i<am; i++) {
3445       /* off-diagonal portion of A */
3446       ncols_o = bi[i+1] - bi[i];
3447       for (jo=0; jo<ncols_o; jo++) {
3448         col = cmap[*bj];
3449         if (col >= cstart) break;
3450         *ca++ = *ba++; bj++;
3451       }
3452       /* diagonal portion of A */
3453       ncols_d = ai[i+1] - ai[i];
3454       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3455       /* off-diagonal portion of A */
3456       for (j=jo; j<ncols_o; j++) {
3457         *ca++ = *ba++; bj++;
3458       }
3459     }
3460   } else {
3461     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3462   }
3463 
3464   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 static PetscEvent logkey_getlocalmatcondensed = 0;
3469 #undef __FUNCT__
3470 #define __FUNCT__ "MatGetLocalMatCondensed"
3471 /*@C
3472      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3473 
3474     Not Collective
3475 
3476    Input Parameters:
3477 +    A - the matrix
3478 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3479 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3480 
3481    Output Parameter:
3482 .    A_loc - the local sequential matrix generated
3483 
3484     Level: developer
3485 
3486 @*/
3487 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3488 {
3489   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3490   PetscErrorCode    ierr;
3491   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3492   IS                isrowa,iscola;
3493   Mat               *aloc;
3494 
3495   PetscFunctionBegin;
3496   if (!logkey_getlocalmatcondensed) {
3497     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3498   }
3499   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3500   if (!row){
3501     start = A->rmap.rstart; end = A->rmap.rend;
3502     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3503   } else {
3504     isrowa = *row;
3505   }
3506   if (!col){
3507     start = A->cmap.rstart;
3508     cmap  = a->garray;
3509     nzA   = a->A->cmap.n;
3510     nzB   = a->B->cmap.n;
3511     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3512     ncols = 0;
3513     for (i=0; i<nzB; i++) {
3514       if (cmap[i] < start) idx[ncols++] = cmap[i];
3515       else break;
3516     }
3517     imark = i;
3518     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3519     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3520     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3521     ierr = PetscFree(idx);CHKERRQ(ierr);
3522   } else {
3523     iscola = *col;
3524   }
3525   if (scall != MAT_INITIAL_MATRIX){
3526     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3527     aloc[0] = *A_loc;
3528   }
3529   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3530   *A_loc = aloc[0];
3531   ierr = PetscFree(aloc);CHKERRQ(ierr);
3532   if (!row){
3533     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3534   }
3535   if (!col){
3536     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3537   }
3538   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3539   PetscFunctionReturn(0);
3540 }
3541 
3542 static PetscEvent logkey_GetBrowsOfAcols = 0;
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatGetBrowsOfAcols"
3545 /*@C
3546     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3547 
3548     Collective on Mat
3549 
3550    Input Parameters:
3551 +    A,B - the matrices in mpiaij format
3552 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3553 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3554 
3555    Output Parameter:
3556 +    rowb, colb - index sets of rows and columns of B to extract
3557 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3558 -    B_seq - the sequential matrix generated
3559 
3560     Level: developer
3561 
3562 @*/
3563 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3564 {
3565   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3566   PetscErrorCode    ierr;
3567   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3568   IS                isrowb,iscolb;
3569   Mat               *bseq;
3570 
3571   PetscFunctionBegin;
3572   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3573     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3574   }
3575   if (!logkey_GetBrowsOfAcols) {
3576     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3577   }
3578   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3579 
3580   if (scall == MAT_INITIAL_MATRIX){
3581     start = A->cmap.rstart;
3582     cmap  = a->garray;
3583     nzA   = a->A->cmap.n;
3584     nzB   = a->B->cmap.n;
3585     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3586     ncols = 0;
3587     for (i=0; i<nzB; i++) {  /* row < local row index */
3588       if (cmap[i] < start) idx[ncols++] = cmap[i];
3589       else break;
3590     }
3591     imark = i;
3592     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3593     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3594     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3595     ierr = PetscFree(idx);CHKERRQ(ierr);
3596     *brstart = imark;
3597     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3598   } else {
3599     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3600     isrowb = *rowb; iscolb = *colb;
3601     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3602     bseq[0] = *B_seq;
3603   }
3604   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3605   *B_seq = bseq[0];
3606   ierr = PetscFree(bseq);CHKERRQ(ierr);
3607   if (!rowb){
3608     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3609   } else {
3610     *rowb = isrowb;
3611   }
3612   if (!colb){
3613     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3614   } else {
3615     *colb = iscolb;
3616   }
3617   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3618   PetscFunctionReturn(0);
3619 }
3620 
3621 static PetscEvent logkey_GetBrowsOfAocols = 0;
3622 #undef __FUNCT__
3623 #define __FUNCT__ "MatGetBrowsOfAoCols"
3624 /*@C
3625     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3626     of the OFF-DIAGONAL portion of local A
3627 
3628     Collective on Mat
3629 
3630    Input Parameters:
3631 +    A,B - the matrices in mpiaij format
3632 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3633 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3634 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3635 
3636    Output Parameter:
3637 +    B_oth - the sequential matrix generated
3638 
3639     Level: developer
3640 
3641 @*/
3642 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3643 {
3644   VecScatter_MPI_General *gen_to,*gen_from;
3645   PetscErrorCode         ierr;
3646   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3647   Mat_SeqAIJ             *b_oth;
3648   VecScatter             ctx=a->Mvctx;
3649   MPI_Comm               comm=ctx->comm;
3650   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3651   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3652   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3653   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3654   MPI_Request            *rwaits,*swaits;
3655   MPI_Status             *sstatus,rstatus;
3656   PetscInt               *cols;
3657   PetscScalar            *vals;
3658   PetscMPIInt            j;
3659 
3660   PetscFunctionBegin;
3661   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3662     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3663   }
3664   if (!logkey_GetBrowsOfAocols) {
3665     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3666   }
3667   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3668   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3669 
3670   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3671   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3672   rvalues  = gen_from->values; /* holds the length of sending row */
3673   svalues  = gen_to->values;   /* holds the length of receiving row */
3674   nrecvs   = gen_from->n;
3675   nsends   = gen_to->n;
3676   rwaits   = gen_from->requests;
3677   swaits   = gen_to->requests;
3678   srow     = gen_to->indices;   /* local row index to be sent */
3679   rstarts  = gen_from->starts;
3680   sstarts  = gen_to->starts;
3681   rprocs   = gen_from->procs;
3682   sprocs   = gen_to->procs;
3683   sstatus  = gen_to->sstatus;
3684 
3685   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3686   if (scall == MAT_INITIAL_MATRIX){
3687     /* i-array */
3688     /*---------*/
3689     /*  post receives */
3690     for (i=0; i<nrecvs; i++){
3691       rowlen = (PetscInt*)rvalues + rstarts[i];
3692       nrows = rstarts[i+1]-rstarts[i];
3693       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3694     }
3695 
3696     /* pack the outgoing message */
3697     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3698     rstartsj = sstartsj + nsends +1;
3699     sstartsj[0] = 0;  rstartsj[0] = 0;
3700     len = 0; /* total length of j or a array to be sent */
3701     k = 0;
3702     for (i=0; i<nsends; i++){
3703       rowlen = (PetscInt*)svalues + sstarts[i];
3704       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3705       for (j=0; j<nrows; j++) {
3706         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3707         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3708         len += rowlen[j];
3709         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3710         k++;
3711       }
3712       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3713        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3714     }
3715     /* recvs and sends of i-array are completed */
3716     i = nrecvs;
3717     while (i--) {
3718       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3719     }
3720     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3721     /* allocate buffers for sending j and a arrays */
3722     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3723     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3724 
3725     /* create i-array of B_oth */
3726     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3727     b_othi[0] = 0;
3728     len = 0; /* total length of j or a array to be received */
3729     k = 0;
3730     for (i=0; i<nrecvs; i++){
3731       rowlen = (PetscInt*)rvalues + rstarts[i];
3732       nrows = rstarts[i+1]-rstarts[i];
3733       for (j=0; j<nrows; j++) {
3734         b_othi[k+1] = b_othi[k] + rowlen[j];
3735         len += rowlen[j]; k++;
3736       }
3737       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3738     }
3739 
3740     /* allocate space for j and a arrrays of B_oth */
3741     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3742     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3743 
3744     /* j-array */
3745     /*---------*/
3746     /*  post receives of j-array */
3747     for (i=0; i<nrecvs; i++){
3748       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3749       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3750     }
3751     k = 0;
3752     for (i=0; i<nsends; i++){
3753       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3754       bufJ = bufj+sstartsj[i];
3755       for (j=0; j<nrows; j++) {
3756         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3757         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3758         for (l=0; l<ncols; l++){
3759           *bufJ++ = cols[l];
3760         }
3761         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3762       }
3763       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3764     }
3765 
3766     /* recvs and sends of j-array are completed */
3767     i = nrecvs;
3768     while (i--) {
3769       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3770     }
3771     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3772   } else if (scall == MAT_REUSE_MATRIX){
3773     sstartsj = *startsj;
3774     rstartsj = sstartsj + nsends +1;
3775     bufa     = *bufa_ptr;
3776     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3777     b_otha   = b_oth->a;
3778   } else {
3779     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3780   }
3781 
3782   /* a-array */
3783   /*---------*/
3784   /*  post receives of a-array */
3785   for (i=0; i<nrecvs; i++){
3786     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3787     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3788   }
3789   k = 0;
3790   for (i=0; i<nsends; i++){
3791     nrows = sstarts[i+1]-sstarts[i];
3792     bufA = bufa+sstartsj[i];
3793     for (j=0; j<nrows; j++) {
3794       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3795       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3796       for (l=0; l<ncols; l++){
3797         *bufA++ = vals[l];
3798       }
3799       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3800 
3801     }
3802     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3803   }
3804   /* recvs and sends of a-array are completed */
3805   i = nrecvs;
3806   while (i--) {
3807     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3808   }
3809    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3810 
3811   if (scall == MAT_INITIAL_MATRIX){
3812     /* put together the new matrix */
3813     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3814 
3815     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3816     /* Since these are PETSc arrays, change flags to free them as necessary. */
3817     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3818     b_oth->freedata = PETSC_TRUE;
3819     b_oth->nonew    = 0;
3820 
3821     ierr = PetscFree(bufj);CHKERRQ(ierr);
3822     if (!startsj || !bufa_ptr){
3823       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3824       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3825     } else {
3826       *startsj  = sstartsj;
3827       *bufa_ptr = bufa;
3828     }
3829   }
3830   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3831 
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 #undef __FUNCT__
3836 #define __FUNCT__ "MatGetCommunicationStructs"
3837 /*@C
3838   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3839 
3840   Not Collective
3841 
3842   Input Parameters:
3843 . A - The matrix in mpiaij format
3844 
3845   Output Parameter:
3846 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3847 . colmap - A map from global column index to local index into lvec
3848 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3849 
3850   Level: developer
3851 
3852 @*/
3853 #if defined (PETSC_USE_CTABLE)
3854 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3855 #else
3856 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3857 #endif
3858 {
3859   Mat_MPIAIJ *a;
3860 
3861   PetscFunctionBegin;
3862   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3863   PetscValidPointer(lvec, 2)
3864   PetscValidPointer(colmap, 3)
3865   PetscValidPointer(multScatter, 4)
3866   a = (Mat_MPIAIJ *) A->data;
3867   if (lvec) *lvec = a->lvec;
3868   if (colmap) *colmap = a->colmap;
3869   if (multScatter) *multScatter = a->Mvctx;
3870   PetscFunctionReturn(0);
3871 }
3872 
3873 /*MC
3874    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3875 
3876    Options Database Keys:
3877 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3878 
3879   Level: beginner
3880 
3881 .seealso: MatCreateMPIAIJ
3882 M*/
3883 
3884 EXTERN_C_BEGIN
3885 #undef __FUNCT__
3886 #define __FUNCT__ "MatCreate_MPIAIJ"
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3888 {
3889   Mat_MPIAIJ     *b;
3890   PetscErrorCode ierr;
3891   PetscMPIInt    size;
3892 
3893   PetscFunctionBegin;
3894   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3895 
3896   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3897   B->data         = (void*)b;
3898   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3899   B->factor       = 0;
3900   B->rmap.bs      = 1;
3901   B->assembled    = PETSC_FALSE;
3902   B->mapping      = 0;
3903 
3904   B->insertmode      = NOT_SET_VALUES;
3905   b->size            = size;
3906   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3907 
3908   /* build cache for off array entries formed */
3909   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3910   b->donotstash  = PETSC_FALSE;
3911   b->colmap      = 0;
3912   b->garray      = 0;
3913   b->roworiented = PETSC_TRUE;
3914 
3915   /* stuff used for matrix vector multiply */
3916   b->lvec      = PETSC_NULL;
3917   b->Mvctx     = PETSC_NULL;
3918 
3919   /* stuff for MatGetRow() */
3920   b->rowindices   = 0;
3921   b->rowvalues    = 0;
3922   b->getrowactive = PETSC_FALSE;
3923 
3924 
3925   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3926                                      "MatStoreValues_MPIAIJ",
3927                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3928   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3929                                      "MatRetrieveValues_MPIAIJ",
3930                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3931   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3932 				     "MatGetDiagonalBlock_MPIAIJ",
3933                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3934   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3935 				     "MatIsTranspose_MPIAIJ",
3936 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3937   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3938 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3939 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3940   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3941 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3942 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3943   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3944 				     "MatDiagonalScaleLocal_MPIAIJ",
3945 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3946   PetscFunctionReturn(0);
3947 }
3948 EXTERN_C_END
3949 
3950 /*
3951     Special version for direct calls from Fortran
3952 */
3953 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3954 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3955 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3956 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3957 #endif
3958 
3959 /* Change these macros so can be used in void function */
3960 #undef CHKERRQ
3961 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3962 #undef SETERRQ2
3963 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3964 #undef SETERRQ
3965 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3966 
3967 EXTERN_C_BEGIN
3968 #undef __FUNCT__
3969 #define __FUNCT__ "matsetvaluesmpiaij_"
3970 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3971 {
3972   Mat            mat = *mmat;
3973   PetscInt       m = *mm, n = *mn;
3974   InsertMode     addv = *maddv;
3975   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3976   PetscScalar    value;
3977   PetscErrorCode ierr;
3978 
3979   MatPreallocated(mat);
3980   if (mat->insertmode == NOT_SET_VALUES) {
3981     mat->insertmode = addv;
3982   }
3983 #if defined(PETSC_USE_DEBUG)
3984   else if (mat->insertmode != addv) {
3985     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3986   }
3987 #endif
3988   {
3989   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
3990   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
3991   PetscTruth     roworiented = aij->roworiented;
3992 
3993   /* Some Variables required in the macro */
3994   Mat            A = aij->A;
3995   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
3996   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
3997   PetscScalar    *aa = a->a;
3998   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
3999   Mat            B = aij->B;
4000   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4001   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4002   PetscScalar    *ba = b->a;
4003 
4004   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4005   PetscInt       nonew = a->nonew;
4006   PetscScalar    *ap1,*ap2;
4007 
4008   PetscFunctionBegin;
4009   for (i=0; i<m; i++) {
4010     if (im[i] < 0) continue;
4011 #if defined(PETSC_USE_DEBUG)
4012     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4013 #endif
4014     if (im[i] >= rstart && im[i] < rend) {
4015       row      = im[i] - rstart;
4016       lastcol1 = -1;
4017       rp1      = aj + ai[row];
4018       ap1      = aa + ai[row];
4019       rmax1    = aimax[row];
4020       nrow1    = ailen[row];
4021       low1     = 0;
4022       high1    = nrow1;
4023       lastcol2 = -1;
4024       rp2      = bj + bi[row];
4025       ap2      = ba + bi[row];
4026       rmax2    = bimax[row];
4027       nrow2    = bilen[row];
4028       low2     = 0;
4029       high2    = nrow2;
4030 
4031       for (j=0; j<n; j++) {
4032         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4033         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4034         if (in[j] >= cstart && in[j] < cend){
4035           col = in[j] - cstart;
4036           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4037         } else if (in[j] < 0) continue;
4038 #if defined(PETSC_USE_DEBUG)
4039         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4040 #endif
4041         else {
4042           if (mat->was_assembled) {
4043             if (!aij->colmap) {
4044               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4045             }
4046 #if defined (PETSC_USE_CTABLE)
4047             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4048 	    col--;
4049 #else
4050             col = aij->colmap[in[j]] - 1;
4051 #endif
4052             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4053               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4054               col =  in[j];
4055               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4056               B = aij->B;
4057               b = (Mat_SeqAIJ*)B->data;
4058               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4059               rp2      = bj + bi[row];
4060               ap2      = ba + bi[row];
4061               rmax2    = bimax[row];
4062               nrow2    = bilen[row];
4063               low2     = 0;
4064               high2    = nrow2;
4065               bm       = aij->B->rmap.n;
4066               ba = b->a;
4067             }
4068           } else col = in[j];
4069           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4070         }
4071       }
4072     } else {
4073       if (!aij->donotstash) {
4074         if (roworiented) {
4075           if (ignorezeroentries && v[i*n] == 0.0) continue;
4076           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4077         } else {
4078           if (ignorezeroentries && v[i] == 0.0) continue;
4079           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4080         }
4081       }
4082     }
4083   }}
4084   PetscFunctionReturnVoid();
4085 }
4086 EXTERN_C_END
4087