xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c123c2ff6c800bd7828b3e039f0971c7d193a696)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
369   aij->rowvalues = 0;
370 
371   /* used by MatAXPY() */
372   a->xtoy = 0; b->xtoy = 0;
373   a->XtoY = 0; b->XtoY = 0;
374 
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
380 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
381 {
382   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
387   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatZeroRows_MPIAIJ"
393 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
394 {
395   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
396   PetscErrorCode ierr;
397   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
398   PetscInt       i,*owners = A->rmap.range;
399   PetscInt       *nprocs,j,idx,nsends,row;
400   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
401   PetscInt       *rvalues,count,base,slen,*source;
402   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
403   MPI_Comm       comm = A->comm;
404   MPI_Request    *send_waits,*recv_waits;
405   MPI_Status     recv_status,*send_status;
406 #if defined(PETSC_DEBUG)
407   PetscTruth     found = PETSC_FALSE;
408 #endif
409 
410   PetscFunctionBegin;
411   /*  first count number of contributors to each processor */
412   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
413   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
414   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
415   j = 0;
416   for (i=0; i<N; i++) {
417     if (lastidx > (idx = rows[i])) j = 0;
418     lastidx = idx;
419     for (; j<size; j++) {
420       if (idx >= owners[j] && idx < owners[j+1]) {
421         nprocs[2*j]++;
422         nprocs[2*j+1] = 1;
423         owner[i] = j;
424 #if defined(PETSC_DEBUG)
425         found = PETSC_TRUE;
426 #endif
427         break;
428       }
429     }
430 #if defined(PETSC_DEBUG)
431     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
432     found = PETSC_FALSE;
433 #endif
434   }
435   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
436 
437   /* inform other processors of number of messages and max length*/
438   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
439 
440   /* post receives:   */
441   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
442   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
443   for (i=0; i<nrecvs; i++) {
444     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
445   }
446 
447   /* do sends:
448       1) starts[i] gives the starting index in svalues for stuff going to
449          the ith processor
450   */
451   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
452   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
453   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
454   starts[0] = 0;
455   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
456   for (i=0; i<N; i++) {
457     svalues[starts[owner[i]]++] = rows[i];
458   }
459 
460   starts[0] = 0;
461   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
462   count = 0;
463   for (i=0; i<size; i++) {
464     if (nprocs[2*i+1]) {
465       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
466     }
467   }
468   ierr = PetscFree(starts);CHKERRQ(ierr);
469 
470   base = owners[rank];
471 
472   /*  wait on receives */
473   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
474   source = lens + nrecvs;
475   count  = nrecvs; slen = 0;
476   while (count) {
477     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
478     /* unpack receives into our local space */
479     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
480     source[imdex]  = recv_status.MPI_SOURCE;
481     lens[imdex]    = n;
482     slen          += n;
483     count--;
484   }
485   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
486 
487   /* move the data into the send scatter */
488   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
489   count = 0;
490   for (i=0; i<nrecvs; i++) {
491     values = rvalues + i*nmax;
492     for (j=0; j<lens[i]; j++) {
493       lrows[count++] = values[j] - base;
494     }
495   }
496   ierr = PetscFree(rvalues);CHKERRQ(ierr);
497   ierr = PetscFree(lens);CHKERRQ(ierr);
498   ierr = PetscFree(owner);CHKERRQ(ierr);
499   ierr = PetscFree(nprocs);CHKERRQ(ierr);
500 
501   /* actually zap the local rows */
502   /*
503         Zero the required rows. If the "diagonal block" of the matrix
504      is square and the user wishes to set the diagonal we use separate
505      code so that MatSetValues() is not called for each diagonal allocating
506      new memory, thus calling lots of mallocs and slowing things down.
507 
508        Contributed by: Matthew Knepley
509   */
510   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
511   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
512   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
513     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
514   } else if (diag != 0.0) {
515     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
516     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
517       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
518 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
519     }
520     for (i = 0; i < slen; i++) {
521       row  = lrows[i] + rstart;
522       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
523     }
524     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   } else {
527     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
528   }
529   ierr = PetscFree(lrows);CHKERRQ(ierr);
530 
531   /* wait on sends */
532   if (nsends) {
533     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
534     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
535     ierr = PetscFree(send_status);CHKERRQ(ierr);
536   }
537   ierr = PetscFree(send_waits);CHKERRQ(ierr);
538   ierr = PetscFree(svalues);CHKERRQ(ierr);
539 
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatMult_MPIAIJ"
545 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
546 {
547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
548   PetscErrorCode ierr;
549   PetscInt       nt;
550 
551   PetscFunctionBegin;
552   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
553   if (nt != A->cmap.n) {
554     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
555   }
556   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
558   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatMultAdd_MPIAIJ"
565 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
566 {
567   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
568   PetscErrorCode ierr;
569 
570   PetscFunctionBegin;
571   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
573   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
580 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
583   PetscErrorCode ierr;
584   PetscTruth     merged;
585 
586   PetscFunctionBegin;
587   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
588   /* do nondiagonal part */
589   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
590   if (!merged) {
591     /* send it on its way */
592     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
593     /* do local part */
594     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
595     /* receive remote parts: note this assumes the values are not actually */
596     /* added in yy until the next line, */
597     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
598   } else {
599     /* do local part */
600     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
601     /* send it on its way */
602     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603     /* values actually were received in the Begin() but we need to call this nop */
604     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 EXTERN_C_BEGIN
610 #undef __FUNCT__
611 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
612 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
613 {
614   MPI_Comm       comm;
615   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
616   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
617   IS             Me,Notme;
618   PetscErrorCode ierr;
619   PetscInt       M,N,first,last,*notme,i;
620   PetscMPIInt    size;
621 
622   PetscFunctionBegin;
623 
624   /* Easy test: symmetric diagonal block */
625   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
626   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
627   if (!*f) PetscFunctionReturn(0);
628   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
630   if (size == 1) PetscFunctionReturn(0);
631 
632   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
633   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
634   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
635   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
636   for (i=0; i<first; i++) notme[i] = i;
637   for (i=last; i<M; i++) notme[i-last+first] = i;
638   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
639   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
640   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
641   Aoff = Aoffs[0];
642   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
643   Boff = Boffs[0];
644   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
645   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
646   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
647   ierr = ISDestroy(Me);CHKERRQ(ierr);
648   ierr = ISDestroy(Notme);CHKERRQ(ierr);
649 
650   PetscFunctionReturn(0);
651 }
652 EXTERN_C_END
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
656 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   /* do nondiagonal part */
663   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
664   /* send it on its way */
665   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
666   /* do local part */
667   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
668   /* receive remote parts */
669   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 /*
674   This only works correctly for square matrices where the subblock A->A is the
675    diagonal block
676 */
677 #undef __FUNCT__
678 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
679 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
680 {
681   PetscErrorCode ierr;
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683 
684   PetscFunctionBegin;
685   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
686   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
687     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
688   }
689   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatScale_MPIAIJ"
695 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
696 {
697   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
698   PetscErrorCode ierr;
699 
700   PetscFunctionBegin;
701   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
702   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatDestroy_MPIAIJ"
708 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
709 {
710   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714 #if defined(PETSC_USE_LOG)
715   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
716 #endif
717   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
718   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
719   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
720 #if defined (PETSC_USE_CTABLE)
721   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
722 #else
723   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
724 #endif
725   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
726   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
727   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
728   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
729   ierr = PetscFree(aij);CHKERRQ(ierr);
730 
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
750   int               fd;
751   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
752   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (!rank) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->rmap.N;
762     header[2] = mat->cmap.N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->rmap.n;
768     range = mat->rmap.range;
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->rmap.n;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->rmap.n; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (!rank) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->rmap.n; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (!rank) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->rmap.n; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (!rank) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,iascii,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo    info;
886       PetscTruth inodes;
887 
888       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
889       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
890       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
891       if (!inodes) {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
893 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
896 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       }
898       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1019 {
1020   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec            bb1;
1023 
1024   PetscFunctionBegin;
1025   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1026 
1027   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1028 
1029   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1030     if (flag & SOR_ZERO_INITIAL_GUESS) {
1031       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1032       its--;
1033     }
1034 
1035     while (its--) {
1036       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038 
1039       /* update rhs: bb1 = bb - B*x */
1040       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1041       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1042 
1043       /* local sweep */
1044       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1045       CHKERRQ(ierr);
1046     }
1047   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052     while (its--) {
1053       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055 
1056       /* update rhs: bb1 = bb - B*x */
1057       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1058       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1059 
1060       /* local sweep */
1061       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1062       CHKERRQ(ierr);
1063     }
1064   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1065     if (flag & SOR_ZERO_INITIAL_GUESS) {
1066       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1067       its--;
1068     }
1069     while (its--) {
1070       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072 
1073       /* update rhs: bb1 = bb - B*x */
1074       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1075       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1076 
1077       /* local sweep */
1078       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1079       CHKERRQ(ierr);
1080     }
1081   } else {
1082     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1083   }
1084 
1085   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatPermute_MPIAIJ"
1091 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1092 {
1093   MPI_Comm       comm,pcomm;
1094   PetscInt       first,local_size,nrows,*rows;
1095   int            ntids;
1096   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1101   /* make a collective version of 'rowp' */
1102   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1103   if (pcomm==comm) {
1104     crowp = rowp;
1105   } else {
1106     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1107     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1108     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1109     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1110   }
1111   /* collect the global row permutation and invert it */
1112   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1113   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1114   if (pcomm!=comm) {
1115     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1116   }
1117   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1118   /* get the local target indices */
1119   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1121   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1123   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1124   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1125   /* the column permutation is so much easier;
1126      make a local version of 'colp' and invert it */
1127   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1128   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1129   if (ntids==1) {
1130     lcolp = colp;
1131   } else {
1132     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1133     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1134     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1135   }
1136   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1137   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1138   if (ntids>1) {
1139     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1140     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1141   }
1142   /* now we just get the submatrix */
1143   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1144   /* clean up */
1145   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1146   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1152 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1153 {
1154   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1155   Mat            A = mat->A,B = mat->B;
1156   PetscErrorCode ierr;
1157   PetscReal      isend[5],irecv[5];
1158 
1159   PetscFunctionBegin;
1160   info->block_size     = 1.0;
1161   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1163   isend[3] = info->memory;  isend[4] = info->mallocs;
1164   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else if (flag == MAT_GLOBAL_SUM) {
1181     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   }
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   info->rows_global       = (double)matin->rmap.N;
1192   info->columns_global    = (double)matin->cmap.N;
1193   info->rows_local        = (double)matin->rmap.n;
1194   info->columns_local     = (double)matin->cmap.N;
1195 
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSetOption_MPIAIJ"
1201 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   switch (op) {
1208   case MAT_NO_NEW_NONZERO_LOCATIONS:
1209   case MAT_YES_NEW_NONZERO_LOCATIONS:
1210   case MAT_COLUMNS_UNSORTED:
1211   case MAT_COLUMNS_SORTED:
1212   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1213   case MAT_KEEP_ZEROED_ROWS:
1214   case MAT_NEW_NONZERO_LOCATION_ERR:
1215   case MAT_USE_INODES:
1216   case MAT_DO_NOT_USE_INODES:
1217   case MAT_IGNORE_ZERO_ENTRIES:
1218     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1220     break;
1221   case MAT_ROW_ORIENTED:
1222     a->roworiented = PETSC_TRUE;
1223     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1224     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1225     break;
1226   case MAT_ROWS_SORTED:
1227   case MAT_ROWS_UNSORTED:
1228   case MAT_YES_NEW_DIAGONALS:
1229     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1230     break;
1231   case MAT_COLUMN_ORIENTED:
1232     a->roworiented = PETSC_FALSE;
1233     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1234     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1235     break;
1236   case MAT_IGNORE_OFF_PROC_ENTRIES:
1237     a->donotstash = PETSC_TRUE;
1238     break;
1239   case MAT_NO_NEW_DIAGONALS:
1240     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1241   case MAT_SYMMETRIC:
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ(PETSC_ERR_SUP,"unknown option");
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   if (!a->rank) {
1529     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1537 {
1538   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1543   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1549 {
1550   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatEqual_MPIAIJ"
1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1561 {
1562   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1563   Mat            a,b,c,d;
1564   PetscTruth     flg;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   a = matA->A; b = matA->B;
1569   c = matB->A; d = matB->B;
1570 
1571   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1572   if (flg) {
1573     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1574   }
1575   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatCopy_MPIAIJ"
1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1582 {
1583   PetscErrorCode ierr;
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1585   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1586 
1587   PetscFunctionBegin;
1588   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1589   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1590     /* because of the column compression in the off-processor part of the matrix a->B,
1591        the number of columns in a->B and b->B may be different, hence we cannot call
1592        the MatCopy() directly on the two parts. If need be, we can provide a more
1593        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1594        then copying the submatrices */
1595     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1596   } else {
1597     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1598     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1606 {
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #include "petscblaslapack.h"
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatAXPY_MPIAIJ"
1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1618 {
1619   PetscErrorCode ierr;
1620   PetscInt       i;
1621   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1622   PetscBLASInt   bnz,one=1;
1623   Mat_SeqAIJ     *x,*y;
1624 
1625   PetscFunctionBegin;
1626   if (str == SAME_NONZERO_PATTERN) {
1627     PetscScalar alpha = a;
1628     x = (Mat_SeqAIJ *)xx->A->data;
1629     y = (Mat_SeqAIJ *)yy->A->data;
1630     bnz = (PetscBLASInt)x->nz;
1631     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     bnz = (PetscBLASInt)x->nz;
1635     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1636   } else if (str == SUBSET_NONZERO_PATTERN) {
1637     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1638 
1639     x = (Mat_SeqAIJ *)xx->B->data;
1640     y = (Mat_SeqAIJ *)yy->B->data;
1641     if (y->xtoy && y->XtoY != xx->B) {
1642       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1643       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1644     }
1645     if (!y->xtoy) { /* get xtoy */
1646       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1647       y->XtoY = xx->B;
1648     }
1649     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1650   } else {
1651     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatConjugate_MPIAIJ"
1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1661 {
1662 #if defined(PETSC_USE_COMPLEX)
1663   PetscErrorCode ierr;
1664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1668   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1669 #else
1670   PetscFunctionBegin;
1671 #endif
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatRealPart_MPIAIJ"
1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1678 {
1679   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1680   PetscErrorCode ierr;
1681 
1682   PetscFunctionBegin;
1683   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1684   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1691 {
1692   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1697   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 /* -------------------------------------------------------------------*/
1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1703        MatGetRow_MPIAIJ,
1704        MatRestoreRow_MPIAIJ,
1705        MatMult_MPIAIJ,
1706 /* 4*/ MatMultAdd_MPIAIJ,
1707        MatMultTranspose_MPIAIJ,
1708        MatMultTransposeAdd_MPIAIJ,
1709        0,
1710        0,
1711        0,
1712 /*10*/ 0,
1713        0,
1714        0,
1715        MatRelax_MPIAIJ,
1716        MatTranspose_MPIAIJ,
1717 /*15*/ MatGetInfo_MPIAIJ,
1718        MatEqual_MPIAIJ,
1719        MatGetDiagonal_MPIAIJ,
1720        MatDiagonalScale_MPIAIJ,
1721        MatNorm_MPIAIJ,
1722 /*20*/ MatAssemblyBegin_MPIAIJ,
1723        MatAssemblyEnd_MPIAIJ,
1724        0,
1725        MatSetOption_MPIAIJ,
1726        MatZeroEntries_MPIAIJ,
1727 /*25*/ MatZeroRows_MPIAIJ,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*30*/ MatSetUpPreallocation_MPIAIJ,
1733        0,
1734        0,
1735        0,
1736        0,
1737 /*35*/ MatDuplicate_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*40*/ MatAXPY_MPIAIJ,
1743        MatGetSubMatrices_MPIAIJ,
1744        MatIncreaseOverlap_MPIAIJ,
1745        MatGetValues_MPIAIJ,
1746        MatCopy_MPIAIJ,
1747 /*45*/ MatPrintHelp_MPIAIJ,
1748        MatScale_MPIAIJ,
1749        0,
1750        0,
1751        0,
1752 /*50*/ MatSetBlockSize_MPIAIJ,
1753        0,
1754        0,
1755        0,
1756        0,
1757 /*55*/ MatFDColoringCreate_MPIAIJ,
1758        0,
1759        MatSetUnfactored_MPIAIJ,
1760        MatPermute_MPIAIJ,
1761        0,
1762 /*60*/ MatGetSubMatrix_MPIAIJ,
1763        MatDestroy_MPIAIJ,
1764        MatView_MPIAIJ,
1765        0,
1766        0,
1767 /*65*/ 0,
1768        0,
1769        0,
1770        0,
1771        0,
1772 /*70*/ 0,
1773        0,
1774        MatSetColoring_MPIAIJ,
1775 #if defined(PETSC_HAVE_ADIC)
1776        MatSetValuesAdic_MPIAIJ,
1777 #else
1778        0,
1779 #endif
1780        MatSetValuesAdifor_MPIAIJ,
1781 /*75*/ 0,
1782        0,
1783        0,
1784        0,
1785        0,
1786 /*80*/ 0,
1787        0,
1788        0,
1789        0,
1790 /*84*/ MatLoad_MPIAIJ,
1791        0,
1792        0,
1793        0,
1794        0,
1795        0,
1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1797        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1798        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1799        MatPtAP_Basic,
1800        MatPtAPSymbolic_MPIAIJ,
1801 /*95*/ MatPtAPNumeric_MPIAIJ,
1802        0,
1803        0,
1804        0,
1805        0,
1806 /*100*/0,
1807        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1808        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1809        MatConjugate_MPIAIJ,
1810        0,
1811 /*105*/MatSetValuesRow_MPIAIJ,
1812        MatRealPart_MPIAIJ,
1813        MatImaginaryPart_MPIAIJ};
1814 
1815 /* ----------------------------------------------------------------------------------------*/
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1821 {
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1827   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 EXTERN_C_BEGIN
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1836 {
1837   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1842   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 EXTERN_C_END
1846 
1847 #include "petscpc.h"
1848 EXTERN_C_BEGIN
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1852 {
1853   Mat_MPIAIJ     *b;
1854   PetscErrorCode ierr;
1855   PetscInt       i;
1856 
1857   PetscFunctionBegin;
1858   B->preallocated = PETSC_TRUE;
1859   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1860   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1861   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1862   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1863 
1864   B->rmap.bs = B->cmap.bs = 1;
1865   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1866   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1867   if (d_nnz) {
1868     for (i=0; i<B->rmap.n; i++) {
1869       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1870     }
1871   }
1872   if (o_nnz) {
1873     for (i=0; i<B->rmap.n; i++) {
1874       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1875     }
1876   }
1877   b = (Mat_MPIAIJ*)B->data;
1878 
1879   /* Explicitly create 2 MATSEQAIJ matrices. */
1880   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1881   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1882   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1883   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1884   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1885   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1886   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1887   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1888 
1889   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1890   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1891 
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1899 {
1900   Mat            mat;
1901   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   *newmat       = 0;
1906   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1907   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1908   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1909   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1910   a    = (Mat_MPIAIJ*)mat->data;
1911 
1912   mat->factor       = matin->factor;
1913   mat->rmap.bs      = matin->rmap.bs;
1914   mat->assembled    = PETSC_TRUE;
1915   mat->insertmode   = NOT_SET_VALUES;
1916   mat->preallocated = PETSC_TRUE;
1917 
1918   a->size           = oldmat->size;
1919   a->rank           = oldmat->rank;
1920   a->donotstash     = oldmat->donotstash;
1921   a->roworiented    = oldmat->roworiented;
1922   a->rowindices     = 0;
1923   a->rowvalues      = 0;
1924   a->getrowactive   = PETSC_FALSE;
1925 
1926   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1927   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1928 
1929   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1935     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1936     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     PetscInt len;
1941     len  = oldmat->B->cmap.n;
1942     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1944     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1945   } else a->garray = 0;
1946 
1947   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1949   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1953   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1955   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscsys.h"
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLoad_MPIAIJ"
1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1965 {
1966   Mat            A;
1967   PetscScalar    *vals,*svals;
1968   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1969   MPI_Status     status;
1970   PetscErrorCode ierr;
1971   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1972   PetscInt       i,nz,j,rstart,rend,mmax;
1973   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1974   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1975   PetscInt       cend,cstart,n,*rowners;
1976   int            fd;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1985   }
1986 
1987   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1988   M = header[1]; N = header[2];
1989   /* determine ownership of all rows */
1990   m    = M/size + ((M % size) > rank);
1991   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1992   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1993 
1994   /* First process needs enough room for process with most rows */
1995   if (!rank) {
1996     mmax       = rowners[1];
1997     for (i=2; i<size; i++) {
1998       mmax = PetscMax(mmax,rowners[i]);
1999     }
2000   } else mmax = m;
2001 
2002   rowners[0] = 0;
2003   for (i=2; i<=size; i++) {
2004     mmax       = PetscMax(mmax,rowners[i]);
2005     rowners[i] += rowners[i-1];
2006   }
2007   rstart = rowners[rank];
2008   rend   = rowners[rank+1];
2009 
2010   /* distribute row lengths to all processors */
2011   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2012   if (!rank) {
2013     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2014     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2015     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2016     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2017     for (j=0; j<m; j++) {
2018       procsnz[0] += ourlens[j];
2019     }
2020     for (i=1; i<size; i++) {
2021       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2022       /* calculate the number of nonzeros on each processor */
2023       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2024         procsnz[i] += rowlengths[j];
2025       }
2026       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2027     }
2028     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2029   } else {
2030     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2031   }
2032 
2033   if (!rank) {
2034     /* determine max buffer needed and allocate it */
2035     maxnz = 0;
2036     for (i=0; i<size; i++) {
2037       maxnz = PetscMax(maxnz,procsnz[i]);
2038     }
2039     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix column indices  */
2042     nz   = procsnz[0];
2043     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2045 
2046     /* read in every one elses and ship off */
2047     for (i=1; i<size; i++) {
2048       nz   = procsnz[i];
2049       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2050       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2051     }
2052     ierr = PetscFree(cols);CHKERRQ(ierr);
2053   } else {
2054     /* determine buffer space needed for message */
2055     nz = 0;
2056     for (i=0; i<m; i++) {
2057       nz += ourlens[i];
2058     }
2059     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2060 
2061     /* receive message of column indices*/
2062     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2063     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2064     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2065   }
2066 
2067   /* determine column ownership if matrix is not square */
2068   if (N != M) {
2069     n      = N/size + ((N % size) > rank);
2070     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     cstart = cend - n;
2072   } else {
2073     cstart = rstart;
2074     cend   = rend;
2075     n      = cend - cstart;
2076   }
2077 
2078   /* loop over local rows, determining number of off diagonal entries */
2079   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2080   jj = 0;
2081   for (i=0; i<m; i++) {
2082     for (j=0; j<ourlens[i]; j++) {
2083       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2084       jj++;
2085     }
2086   }
2087 
2088   /* create our matrix */
2089   for (i=0; i<m; i++) {
2090     ourlens[i] -= offlens[i];
2091   }
2092   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2093   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2094   ierr = MatSetType(A,type);CHKERRQ(ierr);
2095   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2096 
2097   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2098   for (i=0; i<m; i++) {
2099     ourlens[i] += offlens[i];
2100   }
2101 
2102   if (!rank) {
2103     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2104 
2105     /* read in my part of the matrix numerical values  */
2106     nz   = procsnz[0];
2107     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2108 
2109     /* insert into matrix */
2110     jj      = rstart;
2111     smycols = mycols;
2112     svals   = vals;
2113     for (i=0; i<m; i++) {
2114       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2115       smycols += ourlens[i];
2116       svals   += ourlens[i];
2117       jj++;
2118     }
2119 
2120     /* read in other processors and ship out */
2121     for (i=1; i<size; i++) {
2122       nz   = procsnz[i];
2123       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2124       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2127   } else {
2128     /* receive numeric values */
2129     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2130 
2131     /* receive message of values*/
2132     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2133     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2134     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2135 
2136     /* insert into matrix */
2137     jj      = rstart;
2138     smycols = mycols;
2139     svals   = vals;
2140     for (i=0; i<m; i++) {
2141       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2142       smycols += ourlens[i];
2143       svals   += ourlens[i];
2144       jj++;
2145     }
2146   }
2147   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2148   ierr = PetscFree(vals);CHKERRQ(ierr);
2149   ierr = PetscFree(mycols);CHKERRQ(ierr);
2150   ierr = PetscFree(rowners);CHKERRQ(ierr);
2151 
2152   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   *newmat = A;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2160 /*
2161     Not great since it makes two copies of the submatrix, first an SeqAIJ
2162   in local and then by concatenating the local matrices the end result.
2163   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2164 */
2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2166 {
2167   PetscErrorCode ierr;
2168   PetscMPIInt    rank,size;
2169   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2170   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2171   Mat            *local,M,Mreuse;
2172   PetscScalar    *vwork,*aa;
2173   MPI_Comm       comm = mat->comm;
2174   Mat_SeqAIJ     *aij;
2175 
2176 
2177   PetscFunctionBegin;
2178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2180 
2181   if (call ==  MAT_REUSE_MATRIX) {
2182     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2183     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2184     local = &Mreuse;
2185     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2186   } else {
2187     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2188     Mreuse = *local;
2189     ierr   = PetscFree(local);CHKERRQ(ierr);
2190   }
2191 
2192   /*
2193       m - number of local rows
2194       n - number of columns (same on all processors)
2195       rstart - first row in new global matrix generated
2196   */
2197   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2198   if (call == MAT_INITIAL_MATRIX) {
2199     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2200     ii  = aij->i;
2201     jj  = aij->j;
2202 
2203     /*
2204         Determine the number of non-zeros in the diagonal and off-diagonal
2205         portions of the matrix in order to do correct preallocation
2206     */
2207 
2208     /* first get start and end of "diagonal" columns */
2209     if (csize == PETSC_DECIDE) {
2210       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2211       if (mglobal == n) { /* square matrix */
2212 	nlocal = m;
2213       } else {
2214         nlocal = n/size + ((n % size) > rank);
2215       }
2216     } else {
2217       nlocal = csize;
2218     }
2219     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2220     rstart = rend - nlocal;
2221     if (rank == size - 1 && rend != n) {
2222       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2223     }
2224 
2225     /* next, compute all the lengths */
2226     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2227     olens = dlens + m;
2228     for (i=0; i<m; i++) {
2229       jend = ii[i+1] - ii[i];
2230       olen = 0;
2231       dlen = 0;
2232       for (j=0; j<jend; j++) {
2233         if (*jj < rstart || *jj >= rend) olen++;
2234         else dlen++;
2235         jj++;
2236       }
2237       olens[i] = olen;
2238       dlens[i] = dlen;
2239     }
2240     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2241     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2242     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2244     ierr = PetscFree(dlens);CHKERRQ(ierr);
2245   } else {
2246     PetscInt ml,nl;
2247 
2248     M = *newmat;
2249     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2250     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2251     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2252     /*
2253          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2254        rather than the slower MatSetValues().
2255     */
2256     M->was_assembled = PETSC_TRUE;
2257     M->assembled     = PETSC_FALSE;
2258   }
2259   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2260   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2261   ii  = aij->i;
2262   jj  = aij->j;
2263   aa  = aij->a;
2264   for (i=0; i<m; i++) {
2265     row   = rstart + i;
2266     nz    = ii[i+1] - ii[i];
2267     cwork = jj;     jj += nz;
2268     vwork = aa;     aa += nz;
2269     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2270   }
2271 
2272   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   *newmat = M;
2275 
2276   /* save submatrix used in processor for next request */
2277   if (call ==  MAT_INITIAL_MATRIX) {
2278     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2279     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2280   }
2281 
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 EXTERN_C_BEGIN
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2289 {
2290   PetscInt       m,cstart, cend,j,nnz,i,d;
2291   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2292   const PetscInt *JJ;
2293   PetscScalar    *values;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2298 
2299   B->rmap.bs = B->cmap.bs = 1;
2300   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2301   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2302   m      = B->rmap.n;
2303   cstart = B->cmap.rstart;
2304   cend   = B->cmap.rend;
2305   rstart = B->rmap.rstart;
2306 
2307   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2308   o_nnz = d_nnz + m;
2309 
2310   for (i=0; i<m; i++) {
2311     nnz     = I[i+1]- I[i];
2312     JJ      = J + I[i];
2313     nnz_max = PetscMax(nnz_max,nnz);
2314     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2315     for (j=0; j<nnz; j++) {
2316       if (*JJ >= cstart) break;
2317       JJ++;
2318     }
2319     d = 0;
2320     for (; j<nnz; j++) {
2321       if (*JJ++ >= cend) break;
2322       d++;
2323     }
2324     d_nnz[i] = d;
2325     o_nnz[i] = nnz - d;
2326   }
2327   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2329 
2330   if (v) values = (PetscScalar*)v;
2331   else {
2332     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2333     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2334   }
2335 
2336   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2337   for (i=0; i<m; i++) {
2338     ii   = i + rstart;
2339     nnz  = I[i+1]- I[i];
2340     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2341   }
2342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2345 
2346   if (!v) {
2347     ierr = PetscFree(values);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2355 /*@
2356    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2357    (the default parallel PETSc format).
2358 
2359    Collective on MPI_Comm
2360 
2361    Input Parameters:
2362 +  B - the matrix
2363 .  i - the indices into j for the start of each local row (starts with zero)
2364 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2365 -  v - optional values in the matrix
2366 
2367    Level: developer
2368 
2369 .keywords: matrix, aij, compressed row, sparse, parallel
2370 
2371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2372 @*/
2373 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2374 {
2375   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2376 
2377   PetscFunctionBegin;
2378   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2379   if (f) {
2380     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2387 /*@C
2388    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2389    (the default parallel PETSc format).  For good matrix assembly performance
2390    the user should preallocate the matrix storage by setting the parameters
2391    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2392    performance can be increased by more than a factor of 50.
2393 
2394    Collective on MPI_Comm
2395 
2396    Input Parameters:
2397 +  A - the matrix
2398 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2399            (same value is used for all local rows)
2400 .  d_nnz - array containing the number of nonzeros in the various rows of the
2401            DIAGONAL portion of the local submatrix (possibly different for each row)
2402            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2403            The size of this array is equal to the number of local rows, i.e 'm'.
2404            You must leave room for the diagonal entry even if it is zero.
2405 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2406            submatrix (same value is used for all local rows).
2407 -  o_nnz - array containing the number of nonzeros in the various rows of the
2408            OFF-DIAGONAL portion of the local submatrix (possibly different for
2409            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2410            structure. The size of this array is equal to the number
2411            of local rows, i.e 'm'.
2412 
2413    If the *_nnz parameter is given then the *_nz parameter is ignored
2414 
2415    The AIJ format (also called the Yale sparse matrix format or
2416    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2417    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2418 
2419    The parallel matrix is partitioned such that the first m0 rows belong to
2420    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2421    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2422 
2423    The DIAGONAL portion of the local submatrix of a processor can be defined
2424    as the submatrix which is obtained by extraction the part corresponding
2425    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2426    first row that belongs to the processor, and r2 is the last row belonging
2427    to the this processor. This is a square mxm matrix. The remaining portion
2428    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2429 
2430    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2431 
2432    Example usage:
2433 
2434    Consider the following 8x8 matrix with 34 non-zero values, that is
2435    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2436    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2437    as follows:
2438 
2439 .vb
2440             1  2  0  |  0  3  0  |  0  4
2441     Proc0   0  5  6  |  7  0  0  |  8  0
2442             9  0 10  | 11  0  0  | 12  0
2443     -------------------------------------
2444            13  0 14  | 15 16 17  |  0  0
2445     Proc1   0 18  0  | 19 20 21  |  0  0
2446             0  0  0  | 22 23  0  | 24  0
2447     -------------------------------------
2448     Proc2  25 26 27  |  0  0 28  | 29  0
2449            30  0  0  | 31 32 33  |  0 34
2450 .ve
2451 
2452    This can be represented as a collection of submatrices as:
2453 
2454 .vb
2455       A B C
2456       D E F
2457       G H I
2458 .ve
2459 
2460    Where the submatrices A,B,C are owned by proc0, D,E,F are
2461    owned by proc1, G,H,I are owned by proc2.
2462 
2463    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2464    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2465    The 'M','N' parameters are 8,8, and have the same values on all procs.
2466 
2467    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2468    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2469    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2470    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2471    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2472    matrix, ans [DF] as another SeqAIJ matrix.
2473 
2474    When d_nz, o_nz parameters are specified, d_nz storage elements are
2475    allocated for every row of the local diagonal submatrix, and o_nz
2476    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2477    One way to choose d_nz and o_nz is to use the max nonzerors per local
2478    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2479    In this case, the values of d_nz,o_nz are:
2480 .vb
2481      proc0 : dnz = 2, o_nz = 2
2482      proc1 : dnz = 3, o_nz = 2
2483      proc2 : dnz = 1, o_nz = 4
2484 .ve
2485    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2486    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2487    for proc3. i.e we are using 12+15+10=37 storage locations to store
2488    34 values.
2489 
2490    When d_nnz, o_nnz parameters are specified, the storage is specified
2491    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2492    In the above case the values for d_nnz,o_nnz are:
2493 .vb
2494      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2495      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2496      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2497 .ve
2498    Here the space allocated is sum of all the above values i.e 34, and
2499    hence pre-allocation is perfect.
2500 
2501    Level: intermediate
2502 
2503 .keywords: matrix, aij, compressed row, sparse, parallel
2504 
2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2506           MPIAIJ
2507 @*/
2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2509 {
2510   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2514   if (f) {
2515     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatCreateMPIAIJ"
2522 /*@C
2523    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2524    (the default parallel PETSc format).  For good matrix assembly performance
2525    the user should preallocate the matrix storage by setting the parameters
2526    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2527    performance can be increased by more than a factor of 50.
2528 
2529    Collective on MPI_Comm
2530 
2531    Input Parameters:
2532 +  comm - MPI communicator
2533 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2534            This value should be the same as the local size used in creating the
2535            y vector for the matrix-vector product y = Ax.
2536 .  n - This value should be the same as the local size used in creating the
2537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2538        calculated if N is given) For square matrices n is almost always m.
2539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2541 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2542            (same value is used for all local rows)
2543 .  d_nnz - array containing the number of nonzeros in the various rows of the
2544            DIAGONAL portion of the local submatrix (possibly different for each row)
2545            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2546            The size of this array is equal to the number of local rows, i.e 'm'.
2547            You must leave room for the diagonal entry even if it is zero.
2548 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2549            submatrix (same value is used for all local rows).
2550 -  o_nnz - array containing the number of nonzeros in the various rows of the
2551            OFF-DIAGONAL portion of the local submatrix (possibly different for
2552            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2553            structure. The size of this array is equal to the number
2554            of local rows, i.e 'm'.
2555 
2556    Output Parameter:
2557 .  A - the matrix
2558 
2559    Notes:
2560    If the *_nnz parameter is given then the *_nz parameter is ignored
2561 
2562    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2563    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2564    storage requirements for this matrix.
2565 
2566    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2567    processor than it must be used on all processors that share the object for
2568    that argument.
2569 
2570    The user MUST specify either the local or global matrix dimensions
2571    (possibly both).
2572 
2573    The parallel matrix is partitioned such that the first m0 rows belong to
2574    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2575    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2576 
2577    The DIAGONAL portion of the local submatrix of a processor can be defined
2578    as the submatrix which is obtained by extraction the part corresponding
2579    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2580    first row that belongs to the processor, and r2 is the last row belonging
2581    to the this processor. This is a square mxm matrix. The remaining portion
2582    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2583 
2584    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2585 
2586    When calling this routine with a single process communicator, a matrix of
2587    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2588    type of communicator, use the construction mechanism:
2589      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2590 
2591    By default, this format uses inodes (identical nodes) when possible.
2592    We search for consecutive rows with the same nonzero structure, thereby
2593    reusing matrix information to achieve increased efficiency.
2594 
2595    Options Database Keys:
2596 +  -mat_no_inode  - Do not use inodes
2597 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2598 -  -mat_aij_oneindex - Internally use indexing starting at 1
2599         rather than 0.  Note that when calling MatSetValues(),
2600         the user still MUST index entries starting at 0!
2601 
2602 
2603    Example usage:
2604 
2605    Consider the following 8x8 matrix with 34 non-zero values, that is
2606    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2607    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2608    as follows:
2609 
2610 .vb
2611             1  2  0  |  0  3  0  |  0  4
2612     Proc0   0  5  6  |  7  0  0  |  8  0
2613             9  0 10  | 11  0  0  | 12  0
2614     -------------------------------------
2615            13  0 14  | 15 16 17  |  0  0
2616     Proc1   0 18  0  | 19 20 21  |  0  0
2617             0  0  0  | 22 23  0  | 24  0
2618     -------------------------------------
2619     Proc2  25 26 27  |  0  0 28  | 29  0
2620            30  0  0  | 31 32 33  |  0 34
2621 .ve
2622 
2623    This can be represented as a collection of submatrices as:
2624 
2625 .vb
2626       A B C
2627       D E F
2628       G H I
2629 .ve
2630 
2631    Where the submatrices A,B,C are owned by proc0, D,E,F are
2632    owned by proc1, G,H,I are owned by proc2.
2633 
2634    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2635    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2636    The 'M','N' parameters are 8,8, and have the same values on all procs.
2637 
2638    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2639    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2640    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2641    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2642    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2643    matrix, ans [DF] as another SeqAIJ matrix.
2644 
2645    When d_nz, o_nz parameters are specified, d_nz storage elements are
2646    allocated for every row of the local diagonal submatrix, and o_nz
2647    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2648    One way to choose d_nz and o_nz is to use the max nonzerors per local
2649    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2650    In this case, the values of d_nz,o_nz are:
2651 .vb
2652      proc0 : dnz = 2, o_nz = 2
2653      proc1 : dnz = 3, o_nz = 2
2654      proc2 : dnz = 1, o_nz = 4
2655 .ve
2656    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2657    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2658    for proc3. i.e we are using 12+15+10=37 storage locations to store
2659    34 values.
2660 
2661    When d_nnz, o_nnz parameters are specified, the storage is specified
2662    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2663    In the above case the values for d_nnz,o_nnz are:
2664 .vb
2665      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2666      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2667      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2668 .ve
2669    Here the space allocated is sum of all the above values i.e 34, and
2670    hence pre-allocation is perfect.
2671 
2672    Level: intermediate
2673 
2674 .keywords: matrix, aij, compressed row, sparse, parallel
2675 
2676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2677           MPIAIJ
2678 @*/
2679 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2680 {
2681   PetscErrorCode ierr;
2682   PetscMPIInt    size;
2683 
2684   PetscFunctionBegin;
2685   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2686   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2688   if (size > 1) {
2689     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2690     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2691   } else {
2692     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2693     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2694   }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2700 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   *Ad     = a->A;
2706   *Ao     = a->B;
2707   *colmap = a->garray;
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2713 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2714 {
2715   PetscErrorCode ierr;
2716   PetscInt       i;
2717   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2718 
2719   PetscFunctionBegin;
2720   if (coloring->ctype == IS_COLORING_LOCAL) {
2721     ISColoringValue *allcolors,*colors;
2722     ISColoring      ocoloring;
2723 
2724     /* set coloring for diagonal portion */
2725     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2726 
2727     /* set coloring for off-diagonal portion */
2728     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2729     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2730     for (i=0; i<a->B->cmap.n; i++) {
2731       colors[i] = allcolors[a->garray[i]];
2732     }
2733     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2734     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2735     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2736     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2737   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2738     ISColoringValue *colors;
2739     PetscInt        *larray;
2740     ISColoring      ocoloring;
2741 
2742     /* set coloring for diagonal portion */
2743     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2744     for (i=0; i<a->A->cmap.n; i++) {
2745       larray[i] = i + A->cmap.rstart;
2746     }
2747     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2748     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2749     for (i=0; i<a->A->cmap.n; i++) {
2750       colors[i] = coloring->colors[larray[i]];
2751     }
2752     ierr = PetscFree(larray);CHKERRQ(ierr);
2753     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2754     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2755     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2756 
2757     /* set coloring for off-diagonal portion */
2758     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2759     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2760     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2761     for (i=0; i<a->B->cmap.n; i++) {
2762       colors[i] = coloring->colors[larray[i]];
2763     }
2764     ierr = PetscFree(larray);CHKERRQ(ierr);
2765     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap.n,colors,&ocoloring);CHKERRQ(ierr);
2766     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2767     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2768   } else {
2769     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2770   }
2771 
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #if defined(PETSC_HAVE_ADIC)
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2778 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2779 {
2780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2785   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 #endif
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2792 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2799   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatMerge"
2805 /*@C
2806       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2807                  matrices from each processor
2808 
2809     Collective on MPI_Comm
2810 
2811    Input Parameters:
2812 +    comm - the communicators the parallel matrix will live on
2813 .    inmat - the input sequential matrices
2814 .    n - number of local columns (or PETSC_DECIDE)
2815 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2816 
2817    Output Parameter:
2818 .    outmat - the parallel matrix generated
2819 
2820     Level: advanced
2821 
2822    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2823 
2824 @*/
2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2826 {
2827   PetscErrorCode ierr;
2828   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2829   PetscInt       *indx;
2830   PetscScalar    *values;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2834   if (scall == MAT_INITIAL_MATRIX){
2835     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2836     if (n == PETSC_DECIDE){
2837       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2838     }
2839     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2840     rstart -= m;
2841 
2842     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2843     for (i=0;i<m;i++) {
2844       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2845       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2846       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2847     }
2848     /* This routine will ONLY return MPIAIJ type matrix */
2849     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2850     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2851     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2852     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2853     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2854 
2855   } else if (scall == MAT_REUSE_MATRIX){
2856     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2857   } else {
2858     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2859   }
2860 
2861   for (i=0;i<m;i++) {
2862     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2863     I    = i + rstart;
2864     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2865     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2866   }
2867   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2868   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870 
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatFileSplit"
2876 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2877 {
2878   PetscErrorCode    ierr;
2879   PetscMPIInt       rank;
2880   PetscInt          m,N,i,rstart,nnz;
2881   size_t            len;
2882   const PetscInt    *indx;
2883   PetscViewer       out;
2884   char              *name;
2885   Mat               B;
2886   const PetscScalar *values;
2887 
2888   PetscFunctionBegin;
2889   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2890   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2891   /* Should this be the type of the diagonal block of A? */
2892   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2893   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2894   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2895   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2896   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2897   for (i=0;i<m;i++) {
2898     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2899     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2900     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2901   }
2902   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904 
2905   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2906   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2907   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2908   sprintf(name,"%s.%d",outfile,rank);
2909   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2910   ierr = PetscFree(name);
2911   ierr = MatView(B,out);CHKERRQ(ierr);
2912   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2913   ierr = MatDestroy(B);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2920 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2921 {
2922   PetscErrorCode       ierr;
2923   Mat_Merge_SeqsToMPI  *merge;
2924   PetscObjectContainer container;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2928   if (container) {
2929     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2930     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2931     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2935     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2936     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2937     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2938     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2939     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2940     ierr = PetscFree(merge->rowmap.range);CHKERRQ(ierr);
2941 
2942     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2943     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2944   }
2945   ierr = PetscFree(merge);CHKERRQ(ierr);
2946 
2947   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2948   PetscFunctionReturn(0);
2949 }
2950 
2951 #include "src/mat/utils/freespace.h"
2952 #include "petscbt.h"
2953 static PetscEvent logkey_seqstompinum = 0;
2954 #undef __FUNCT__
2955 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2956 /*@C
2957       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2958                  matrices from each processor
2959 
2960     Collective on MPI_Comm
2961 
2962    Input Parameters:
2963 +    comm - the communicators the parallel matrix will live on
2964 .    seqmat - the input sequential matrices
2965 .    m - number of local rows (or PETSC_DECIDE)
2966 .    n - number of local columns (or PETSC_DECIDE)
2967 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2968 
2969    Output Parameter:
2970 .    mpimat - the parallel matrix generated
2971 
2972     Level: advanced
2973 
2974    Notes:
2975      The dimensions of the sequential matrix in each processor MUST be the same.
2976      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2977      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2978 @*/
2979 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2980 {
2981   PetscErrorCode       ierr;
2982   MPI_Comm             comm=mpimat->comm;
2983   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2984   PetscMPIInt          size,rank,taga,*len_s;
2985   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
2986   PetscInt             proc,m;
2987   PetscInt             **buf_ri,**buf_rj;
2988   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2989   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2990   MPI_Request          *s_waits,*r_waits;
2991   MPI_Status           *status;
2992   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2993   Mat_Merge_SeqsToMPI  *merge;
2994   PetscObjectContainer container;
2995 
2996   PetscFunctionBegin;
2997   if (!logkey_seqstompinum) {
2998     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2999   }
3000   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3001 
3002   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3003   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3004 
3005   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3006   if (container) {
3007     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3008   }
3009   bi     = merge->bi;
3010   bj     = merge->bj;
3011   buf_ri = merge->buf_ri;
3012   buf_rj = merge->buf_rj;
3013 
3014   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3015   owners = merge->rowmap.range;
3016   len_s  = merge->len_s;
3017 
3018   /* send and recv matrix values */
3019   /*-----------------------------*/
3020   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3021   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3022 
3023   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3024   for (proc=0,k=0; proc<size; proc++){
3025     if (!len_s[proc]) continue;
3026     i = owners[proc];
3027     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3028     k++;
3029   }
3030 
3031   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3032   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3033   ierr = PetscFree(status);CHKERRQ(ierr);
3034 
3035   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3036   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3037 
3038   /* insert mat values of mpimat */
3039   /*----------------------------*/
3040   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3041   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3042   nextrow = buf_ri_k + merge->nrecv;
3043   nextai  = nextrow + merge->nrecv;
3044 
3045   for (k=0; k<merge->nrecv; k++){
3046     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3047     nrows = *(buf_ri_k[k]);
3048     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3049     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3050   }
3051 
3052   /* set values of ba */
3053   m = merge->rowmap.n;
3054   for (i=0; i<m; i++) {
3055     arow = owners[rank] + i;
3056     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3057     bnzi = bi[i+1] - bi[i];
3058     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3059 
3060     /* add local non-zero vals of this proc's seqmat into ba */
3061     anzi = ai[arow+1] - ai[arow];
3062     aj   = a->j + ai[arow];
3063     aa   = a->a + ai[arow];
3064     nextaj = 0;
3065     for (j=0; nextaj<anzi; j++){
3066       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3067         ba_i[j] += aa[nextaj++];
3068       }
3069     }
3070 
3071     /* add received vals into ba */
3072     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3073       /* i-th row */
3074       if (i == *nextrow[k]) {
3075         anzi = *(nextai[k]+1) - *nextai[k];
3076         aj   = buf_rj[k] + *(nextai[k]);
3077         aa   = abuf_r[k] + *(nextai[k]);
3078         nextaj = 0;
3079         for (j=0; nextaj<anzi; j++){
3080           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3081             ba_i[j] += aa[nextaj++];
3082           }
3083         }
3084         nextrow[k]++; nextai[k]++;
3085       }
3086     }
3087     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3088   }
3089   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091 
3092   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3093   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3094   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3095   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3096   PetscFunctionReturn(0);
3097 }
3098 
3099 static PetscEvent logkey_seqstompisym = 0;
3100 #undef __FUNCT__
3101 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3102 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3103 {
3104   PetscErrorCode       ierr;
3105   Mat                  B_mpi;
3106   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3107   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3108   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3109   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3110   PetscInt             len,proc,*dnz,*onz;
3111   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3112   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3113   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3114   MPI_Status           *status;
3115   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3116   PetscBT              lnkbt;
3117   Mat_Merge_SeqsToMPI  *merge;
3118   PetscObjectContainer container;
3119 
3120   PetscFunctionBegin;
3121   if (!logkey_seqstompisym) {
3122     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3123   }
3124   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3125 
3126   /* make sure it is a PETSc comm */
3127   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3128   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3129   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3130 
3131   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3132   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3133 
3134   /* determine row ownership */
3135   /*---------------------------------------------------------*/
3136   merge->rowmap.n = m;
3137   merge->rowmap.N = M;
3138   merge->rowmap.bs = 1;
3139   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3140   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3141   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3142 
3143   m      = merge->rowmap.n;
3144   M      = merge->rowmap.N;
3145   owners = merge->rowmap.range;
3146 
3147   /* determine the number of messages to send, their lengths */
3148   /*---------------------------------------------------------*/
3149   len_s  = merge->len_s;
3150 
3151   len = 0;  /* length of buf_si[] */
3152   merge->nsend = 0;
3153   for (proc=0; proc<size; proc++){
3154     len_si[proc] = 0;
3155     if (proc == rank){
3156       len_s[proc] = 0;
3157     } else {
3158       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3159       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3160     }
3161     if (len_s[proc]) {
3162       merge->nsend++;
3163       nrows = 0;
3164       for (i=owners[proc]; i<owners[proc+1]; i++){
3165         if (ai[i+1] > ai[i]) nrows++;
3166       }
3167       len_si[proc] = 2*(nrows+1);
3168       len += len_si[proc];
3169     }
3170   }
3171 
3172   /* determine the number and length of messages to receive for ij-structure */
3173   /*-------------------------------------------------------------------------*/
3174   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3175   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3176 
3177   /* post the Irecv of j-structure */
3178   /*-------------------------------*/
3179   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
3180   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3181 
3182   /* post the Isend of j-structure */
3183   /*--------------------------------*/
3184   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3185   sj_waits = si_waits + merge->nsend;
3186 
3187   for (proc=0, k=0; proc<size; proc++){
3188     if (!len_s[proc]) continue;
3189     i = owners[proc];
3190     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3191     k++;
3192   }
3193 
3194   /* receives and sends of j-structure are complete */
3195   /*------------------------------------------------*/
3196   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3197   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3198 
3199   /* send and recv i-structure */
3200   /*---------------------------*/
3201   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
3202   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3203 
3204   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3205   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3206   for (proc=0,k=0; proc<size; proc++){
3207     if (!len_s[proc]) continue;
3208     /* form outgoing message for i-structure:
3209          buf_si[0]:                 nrows to be sent
3210                [1:nrows]:           row index (global)
3211                [nrows+1:2*nrows+1]: i-structure index
3212     */
3213     /*-------------------------------------------*/
3214     nrows = len_si[proc]/2 - 1;
3215     buf_si_i    = buf_si + nrows+1;
3216     buf_si[0]   = nrows;
3217     buf_si_i[0] = 0;
3218     nrows = 0;
3219     for (i=owners[proc]; i<owners[proc+1]; i++){
3220       anzi = ai[i+1] - ai[i];
3221       if (anzi) {
3222         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3223         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3224         nrows++;
3225       }
3226     }
3227     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3228     k++;
3229     buf_si += len_si[proc];
3230   }
3231 
3232   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3233   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3234 
3235   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3236   for (i=0; i<merge->nrecv; i++){
3237     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3238   }
3239 
3240   ierr = PetscFree(len_si);CHKERRQ(ierr);
3241   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3242   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3244   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3245   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3246   ierr = PetscFree(status);CHKERRQ(ierr);
3247 
3248   /* compute a local seq matrix in each processor */
3249   /*----------------------------------------------*/
3250   /* allocate bi array and free space for accumulating nonzero column info */
3251   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3252   bi[0] = 0;
3253 
3254   /* create and initialize a linked list */
3255   nlnk = N+1;
3256   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3257 
3258   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3259   len = 0;
3260   len  = ai[owners[rank+1]] - ai[owners[rank]];
3261   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3262   current_space = free_space;
3263 
3264   /* determine symbolic info for each local row */
3265   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3266   nextrow = buf_ri_k + merge->nrecv;
3267   nextai  = nextrow + merge->nrecv;
3268   for (k=0; k<merge->nrecv; k++){
3269     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3270     nrows = *buf_ri_k[k];
3271     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3272     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3273   }
3274 
3275   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3276   len = 0;
3277   for (i=0;i<m;i++) {
3278     bnzi   = 0;
3279     /* add local non-zero cols of this proc's seqmat into lnk */
3280     arow   = owners[rank] + i;
3281     anzi   = ai[arow+1] - ai[arow];
3282     aj     = a->j + ai[arow];
3283     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3284     bnzi += nlnk;
3285     /* add received col data into lnk */
3286     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3287       if (i == *nextrow[k]) { /* i-th row */
3288         anzi = *(nextai[k]+1) - *nextai[k];
3289         aj   = buf_rj[k] + *nextai[k];
3290         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3291         bnzi += nlnk;
3292         nextrow[k]++; nextai[k]++;
3293       }
3294     }
3295     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3296 
3297     /* if free space is not available, make more free space */
3298     if (current_space->local_remaining<bnzi) {
3299       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3300       nspacedouble++;
3301     }
3302     /* copy data into free space, then initialize lnk */
3303     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3304     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3305 
3306     current_space->array           += bnzi;
3307     current_space->local_used      += bnzi;
3308     current_space->local_remaining -= bnzi;
3309 
3310     bi[i+1] = bi[i] + bnzi;
3311   }
3312 
3313   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3314 
3315   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3316   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3317   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3318 
3319   /* create symbolic parallel matrix B_mpi */
3320   /*---------------------------------------*/
3321   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3322   if (n==PETSC_DECIDE) {
3323     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3324   } else {
3325     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3326   }
3327   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3328   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3329   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3330 
3331   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3332   B_mpi->assembled     = PETSC_FALSE;
3333   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3334   merge->bi            = bi;
3335   merge->bj            = bj;
3336   merge->buf_ri        = buf_ri;
3337   merge->buf_rj        = buf_rj;
3338   merge->coi           = PETSC_NULL;
3339   merge->coj           = PETSC_NULL;
3340   merge->owners_co     = PETSC_NULL;
3341 
3342   /* attach the supporting struct to B_mpi for reuse */
3343   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3344   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3345   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3346   *mpimat = B_mpi;
3347 
3348   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3349   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 static PetscEvent logkey_seqstompi = 0;
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMerge_SeqsToMPI"
3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3357 {
3358   PetscErrorCode   ierr;
3359 
3360   PetscFunctionBegin;
3361   if (!logkey_seqstompi) {
3362     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3363   }
3364   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3365   if (scall == MAT_INITIAL_MATRIX){
3366     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3367   }
3368   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3369   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 static PetscEvent logkey_getlocalmat = 0;
3373 #undef __FUNCT__
3374 #define __FUNCT__ "MatGetLocalMat"
3375 /*@C
3376      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3377 
3378     Not Collective
3379 
3380    Input Parameters:
3381 +    A - the matrix
3382 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3383 
3384    Output Parameter:
3385 .    A_loc - the local sequential matrix generated
3386 
3387     Level: developer
3388 
3389 @*/
3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3391 {
3392   PetscErrorCode  ierr;
3393   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3394   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3395   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3396   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3397   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3398   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3399 
3400   PetscFunctionBegin;
3401   if (!logkey_getlocalmat) {
3402     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3403   }
3404   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3405   if (scall == MAT_INITIAL_MATRIX){
3406     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3407     ci[0] = 0;
3408     for (i=0; i<am; i++){
3409       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3410     }
3411     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3412     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3413     k = 0;
3414     for (i=0; i<am; i++) {
3415       ncols_o = bi[i+1] - bi[i];
3416       ncols_d = ai[i+1] - ai[i];
3417       /* off-diagonal portion of A */
3418       for (jo=0; jo<ncols_o; jo++) {
3419         col = cmap[*bj];
3420         if (col >= cstart) break;
3421         cj[k]   = col; bj++;
3422         ca[k++] = *ba++;
3423       }
3424       /* diagonal portion of A */
3425       for (j=0; j<ncols_d; j++) {
3426         cj[k]   = cstart + *aj++;
3427         ca[k++] = *aa++;
3428       }
3429       /* off-diagonal portion of A */
3430       for (j=jo; j<ncols_o; j++) {
3431         cj[k]   = cmap[*bj++];
3432         ca[k++] = *ba++;
3433       }
3434     }
3435     /* put together the new matrix */
3436     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3437     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3438     /* Since these are PETSc arrays, change flags to free them as necessary. */
3439     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3440     mat->freedata = PETSC_TRUE;
3441     mat->nonew    = 0;
3442   } else if (scall == MAT_REUSE_MATRIX){
3443     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3444     ci = mat->i; cj = mat->j; ca = mat->a;
3445     for (i=0; i<am; i++) {
3446       /* off-diagonal portion of A */
3447       ncols_o = bi[i+1] - bi[i];
3448       for (jo=0; jo<ncols_o; jo++) {
3449         col = cmap[*bj];
3450         if (col >= cstart) break;
3451         *ca++ = *ba++; bj++;
3452       }
3453       /* diagonal portion of A */
3454       ncols_d = ai[i+1] - ai[i];
3455       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3456       /* off-diagonal portion of A */
3457       for (j=jo; j<ncols_o; j++) {
3458         *ca++ = *ba++; bj++;
3459       }
3460     }
3461   } else {
3462     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3463   }
3464 
3465   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 static PetscEvent logkey_getlocalmatcondensed = 0;
3470 #undef __FUNCT__
3471 #define __FUNCT__ "MatGetLocalMatCondensed"
3472 /*@C
3473      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3474 
3475     Not Collective
3476 
3477    Input Parameters:
3478 +    A - the matrix
3479 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3480 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3481 
3482    Output Parameter:
3483 .    A_loc - the local sequential matrix generated
3484 
3485     Level: developer
3486 
3487 @*/
3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3489 {
3490   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3491   PetscErrorCode    ierr;
3492   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3493   IS                isrowa,iscola;
3494   Mat               *aloc;
3495 
3496   PetscFunctionBegin;
3497   if (!logkey_getlocalmatcondensed) {
3498     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3499   }
3500   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3501   if (!row){
3502     start = A->rmap.rstart; end = A->rmap.rend;
3503     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3504   } else {
3505     isrowa = *row;
3506   }
3507   if (!col){
3508     start = A->cmap.rstart;
3509     cmap  = a->garray;
3510     nzA   = a->A->cmap.n;
3511     nzB   = a->B->cmap.n;
3512     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3513     ncols = 0;
3514     for (i=0; i<nzB; i++) {
3515       if (cmap[i] < start) idx[ncols++] = cmap[i];
3516       else break;
3517     }
3518     imark = i;
3519     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3520     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3521     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3522     ierr = PetscFree(idx);CHKERRQ(ierr);
3523   } else {
3524     iscola = *col;
3525   }
3526   if (scall != MAT_INITIAL_MATRIX){
3527     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3528     aloc[0] = *A_loc;
3529   }
3530   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3531   *A_loc = aloc[0];
3532   ierr = PetscFree(aloc);CHKERRQ(ierr);
3533   if (!row){
3534     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3535   }
3536   if (!col){
3537     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3538   }
3539   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 static PetscEvent logkey_GetBrowsOfAcols = 0;
3544 #undef __FUNCT__
3545 #define __FUNCT__ "MatGetBrowsOfAcols"
3546 /*@C
3547     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3548 
3549     Collective on Mat
3550 
3551    Input Parameters:
3552 +    A,B - the matrices in mpiaij format
3553 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3554 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3555 
3556    Output Parameter:
3557 +    rowb, colb - index sets of rows and columns of B to extract
3558 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3559 -    B_seq - the sequential matrix generated
3560 
3561     Level: developer
3562 
3563 @*/
3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3565 {
3566   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3567   PetscErrorCode    ierr;
3568   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3569   IS                isrowb,iscolb;
3570   Mat               *bseq;
3571 
3572   PetscFunctionBegin;
3573   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3574     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3575   }
3576   if (!logkey_GetBrowsOfAcols) {
3577     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3578   }
3579   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3580 
3581   if (scall == MAT_INITIAL_MATRIX){
3582     start = A->cmap.rstart;
3583     cmap  = a->garray;
3584     nzA   = a->A->cmap.n;
3585     nzB   = a->B->cmap.n;
3586     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3587     ncols = 0;
3588     for (i=0; i<nzB; i++) {  /* row < local row index */
3589       if (cmap[i] < start) idx[ncols++] = cmap[i];
3590       else break;
3591     }
3592     imark = i;
3593     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3594     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3596     ierr = PetscFree(idx);CHKERRQ(ierr);
3597     *brstart = imark;
3598     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3599   } else {
3600     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3601     isrowb = *rowb; iscolb = *colb;
3602     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3603     bseq[0] = *B_seq;
3604   }
3605   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3606   *B_seq = bseq[0];
3607   ierr = PetscFree(bseq);CHKERRQ(ierr);
3608   if (!rowb){
3609     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3610   } else {
3611     *rowb = isrowb;
3612   }
3613   if (!colb){
3614     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3615   } else {
3616     *colb = iscolb;
3617   }
3618   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 static PetscEvent logkey_GetBrowsOfAocols = 0;
3623 #undef __FUNCT__
3624 #define __FUNCT__ "MatGetBrowsOfAoCols"
3625 /*@C
3626     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3627     of the OFF-DIAGONAL portion of local A
3628 
3629     Collective on Mat
3630 
3631    Input Parameters:
3632 +    A,B - the matrices in mpiaij format
3633 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3634 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3635 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3636 
3637    Output Parameter:
3638 +    B_oth - the sequential matrix generated
3639 
3640     Level: developer
3641 
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3644 {
3645   VecScatter_MPI_General *gen_to,*gen_from;
3646   PetscErrorCode         ierr;
3647   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3648   Mat_SeqAIJ             *b_oth;
3649   VecScatter             ctx=a->Mvctx;
3650   MPI_Comm               comm=ctx->comm;
3651   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3652   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3653   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3654   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3655   MPI_Request            *rwaits,*swaits;
3656   MPI_Status             *sstatus,rstatus;
3657   PetscInt               *cols;
3658   PetscScalar            *vals;
3659   PetscMPIInt            j;
3660 
3661   PetscFunctionBegin;
3662   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3663     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3664   }
3665   if (!logkey_GetBrowsOfAocols) {
3666     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3667   }
3668   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670 
3671   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3672   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3673   rvalues  = gen_from->values; /* holds the length of sending row */
3674   svalues  = gen_to->values;   /* holds the length of receiving row */
3675   nrecvs   = gen_from->n;
3676   nsends   = gen_to->n;
3677   rwaits   = gen_from->requests;
3678   swaits   = gen_to->requests;
3679   srow     = gen_to->indices;   /* local row index to be sent */
3680   rstarts  = gen_from->starts;
3681   sstarts  = gen_to->starts;
3682   rprocs   = gen_from->procs;
3683   sprocs   = gen_to->procs;
3684   sstatus  = gen_to->sstatus;
3685 
3686   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3687   if (scall == MAT_INITIAL_MATRIX){
3688     /* i-array */
3689     /*---------*/
3690     /*  post receives */
3691     for (i=0; i<nrecvs; i++){
3692       rowlen = (PetscInt*)rvalues + rstarts[i];
3693       nrows = rstarts[i+1]-rstarts[i];
3694       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3695     }
3696 
3697     /* pack the outgoing message */
3698     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3699     rstartsj = sstartsj + nsends +1;
3700     sstartsj[0] = 0;  rstartsj[0] = 0;
3701     len = 0; /* total length of j or a array to be sent */
3702     k = 0;
3703     for (i=0; i<nsends; i++){
3704       rowlen = (PetscInt*)svalues + sstarts[i];
3705       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3706       for (j=0; j<nrows; j++) {
3707         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3708         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3709         len += rowlen[j];
3710         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3711         k++;
3712       }
3713       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3714        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3715     }
3716     /* recvs and sends of i-array are completed */
3717     i = nrecvs;
3718     while (i--) {
3719       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720     }
3721     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3722     /* allocate buffers for sending j and a arrays */
3723     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3724     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3725 
3726     /* create i-array of B_oth */
3727     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3728     b_othi[0] = 0;
3729     len = 0; /* total length of j or a array to be received */
3730     k = 0;
3731     for (i=0; i<nrecvs; i++){
3732       rowlen = (PetscInt*)rvalues + rstarts[i];
3733       nrows = rstarts[i+1]-rstarts[i];
3734       for (j=0; j<nrows; j++) {
3735         b_othi[k+1] = b_othi[k] + rowlen[j];
3736         len += rowlen[j]; k++;
3737       }
3738       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3739     }
3740 
3741     /* allocate space for j and a arrrays of B_oth */
3742     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3743     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3744 
3745     /* j-array */
3746     /*---------*/
3747     /*  post receives of j-array */
3748     for (i=0; i<nrecvs; i++){
3749       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3750       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3751     }
3752     k = 0;
3753     for (i=0; i<nsends; i++){
3754       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3755       bufJ = bufj+sstartsj[i];
3756       for (j=0; j<nrows; j++) {
3757         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3758         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3759         for (l=0; l<ncols; l++){
3760           *bufJ++ = cols[l];
3761         }
3762         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3763       }
3764       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3765     }
3766 
3767     /* recvs and sends of j-array are completed */
3768     i = nrecvs;
3769     while (i--) {
3770       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3771     }
3772     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3773   } else if (scall == MAT_REUSE_MATRIX){
3774     sstartsj = *startsj;
3775     rstartsj = sstartsj + nsends +1;
3776     bufa     = *bufa_ptr;
3777     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3778     b_otha   = b_oth->a;
3779   } else {
3780     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3781   }
3782 
3783   /* a-array */
3784   /*---------*/
3785   /*  post receives of a-array */
3786   for (i=0; i<nrecvs; i++){
3787     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3788     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3789   }
3790   k = 0;
3791   for (i=0; i<nsends; i++){
3792     nrows = sstarts[i+1]-sstarts[i];
3793     bufA = bufa+sstartsj[i];
3794     for (j=0; j<nrows; j++) {
3795       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3796       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3797       for (l=0; l<ncols; l++){
3798         *bufA++ = vals[l];
3799       }
3800       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3801 
3802     }
3803     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3804   }
3805   /* recvs and sends of a-array are completed */
3806   i = nrecvs;
3807   while (i--) {
3808     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3809   }
3810    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3811 
3812   if (scall == MAT_INITIAL_MATRIX){
3813     /* put together the new matrix */
3814     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3815 
3816     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3817     /* Since these are PETSc arrays, change flags to free them as necessary. */
3818     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3819     b_oth->freedata = PETSC_TRUE;
3820     b_oth->nonew    = 0;
3821 
3822     ierr = PetscFree(bufj);CHKERRQ(ierr);
3823     if (!startsj || !bufa_ptr){
3824       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3825       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3826     } else {
3827       *startsj  = sstartsj;
3828       *bufa_ptr = bufa;
3829     }
3830   }
3831   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatGetCommunicationStructs"
3838 /*@C
3839   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3840 
3841   Not Collective
3842 
3843   Input Parameters:
3844 . A - The matrix in mpiaij format
3845 
3846   Output Parameter:
3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3848 . colmap - A map from global column index to local index into lvec
3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3850 
3851   Level: developer
3852 
3853 @*/
3854 #if defined (PETSC_USE_CTABLE)
3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3856 #else
3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3858 #endif
3859 {
3860   Mat_MPIAIJ *a;
3861 
3862   PetscFunctionBegin;
3863   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3864   PetscValidPointer(lvec, 2)
3865   PetscValidPointer(colmap, 3)
3866   PetscValidPointer(multScatter, 4)
3867   a = (Mat_MPIAIJ *) A->data;
3868   if (lvec) *lvec = a->lvec;
3869   if (colmap) *colmap = a->colmap;
3870   if (multScatter) *multScatter = a->Mvctx;
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 /*MC
3875    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3876 
3877    Options Database Keys:
3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3879 
3880   Level: beginner
3881 
3882 .seealso: MatCreateMPIAIJ
3883 M*/
3884 
3885 EXTERN_C_BEGIN
3886 #undef __FUNCT__
3887 #define __FUNCT__ "MatCreate_MPIAIJ"
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3889 {
3890   Mat_MPIAIJ     *b;
3891   PetscErrorCode ierr;
3892   PetscMPIInt    size;
3893 
3894   PetscFunctionBegin;
3895   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3896 
3897   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3898   B->data         = (void*)b;
3899   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3900   B->factor       = 0;
3901   B->rmap.bs      = 1;
3902   B->assembled    = PETSC_FALSE;
3903   B->mapping      = 0;
3904 
3905   B->insertmode      = NOT_SET_VALUES;
3906   b->size            = size;
3907   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3908 
3909   /* build cache for off array entries formed */
3910   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3911   b->donotstash  = PETSC_FALSE;
3912   b->colmap      = 0;
3913   b->garray      = 0;
3914   b->roworiented = PETSC_TRUE;
3915 
3916   /* stuff used for matrix vector multiply */
3917   b->lvec      = PETSC_NULL;
3918   b->Mvctx     = PETSC_NULL;
3919 
3920   /* stuff for MatGetRow() */
3921   b->rowindices   = 0;
3922   b->rowvalues    = 0;
3923   b->getrowactive = PETSC_FALSE;
3924 
3925 
3926   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3927                                      "MatStoreValues_MPIAIJ",
3928                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3929   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3930                                      "MatRetrieveValues_MPIAIJ",
3931                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3932   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3933 				     "MatGetDiagonalBlock_MPIAIJ",
3934                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3935   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3936 				     "MatIsTranspose_MPIAIJ",
3937 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3938   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3939 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3940 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3941   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3942 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3943 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3944   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3945 				     "MatDiagonalScaleLocal_MPIAIJ",
3946 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 EXTERN_C_END
3950 
3951 /*
3952     Special version for direct calls from Fortran
3953 */
3954 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3958 #endif
3959 
3960 /* Change these macros so can be used in void function */
3961 #undef CHKERRQ
3962 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3963 #undef SETERRQ2
3964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3965 #undef SETERRQ
3966 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3967 
3968 EXTERN_C_BEGIN
3969 #undef __FUNCT__
3970 #define __FUNCT__ "matsetvaluesmpiaij_"
3971 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3972 {
3973   Mat            mat = *mmat;
3974   PetscInt       m = *mm, n = *mn;
3975   InsertMode     addv = *maddv;
3976   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3977   PetscScalar    value;
3978   PetscErrorCode ierr;
3979 
3980   MatPreallocated(mat);
3981   if (mat->insertmode == NOT_SET_VALUES) {
3982     mat->insertmode = addv;
3983   }
3984 #if defined(PETSC_USE_DEBUG)
3985   else if (mat->insertmode != addv) {
3986     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3987   }
3988 #endif
3989   {
3990   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
3991   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
3992   PetscTruth     roworiented = aij->roworiented;
3993 
3994   /* Some Variables required in the macro */
3995   Mat            A = aij->A;
3996   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
3997   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
3998   PetscScalar    *aa = a->a;
3999   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4000   Mat            B = aij->B;
4001   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4002   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4003   PetscScalar    *ba = b->a;
4004 
4005   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4006   PetscInt       nonew = a->nonew;
4007   PetscScalar    *ap1,*ap2;
4008 
4009   PetscFunctionBegin;
4010   for (i=0; i<m; i++) {
4011     if (im[i] < 0) continue;
4012 #if defined(PETSC_USE_DEBUG)
4013     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4014 #endif
4015     if (im[i] >= rstart && im[i] < rend) {
4016       row      = im[i] - rstart;
4017       lastcol1 = -1;
4018       rp1      = aj + ai[row];
4019       ap1      = aa + ai[row];
4020       rmax1    = aimax[row];
4021       nrow1    = ailen[row];
4022       low1     = 0;
4023       high1    = nrow1;
4024       lastcol2 = -1;
4025       rp2      = bj + bi[row];
4026       ap2      = ba + bi[row];
4027       rmax2    = bimax[row];
4028       nrow2    = bilen[row];
4029       low2     = 0;
4030       high2    = nrow2;
4031 
4032       for (j=0; j<n; j++) {
4033         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4034         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4035         if (in[j] >= cstart && in[j] < cend){
4036           col = in[j] - cstart;
4037           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4038         } else if (in[j] < 0) continue;
4039 #if defined(PETSC_USE_DEBUG)
4040         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4041 #endif
4042         else {
4043           if (mat->was_assembled) {
4044             if (!aij->colmap) {
4045               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4046             }
4047 #if defined (PETSC_USE_CTABLE)
4048             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4049 	    col--;
4050 #else
4051             col = aij->colmap[in[j]] - 1;
4052 #endif
4053             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4054               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4055               col =  in[j];
4056               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4057               B = aij->B;
4058               b = (Mat_SeqAIJ*)B->data;
4059               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4060               rp2      = bj + bi[row];
4061               ap2      = ba + bi[row];
4062               rmax2    = bimax[row];
4063               nrow2    = bilen[row];
4064               low2     = 0;
4065               high2    = nrow2;
4066               bm       = aij->B->rmap.n;
4067               ba = b->a;
4068             }
4069           } else col = in[j];
4070           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4071         }
4072       }
4073     } else {
4074       if (!aij->donotstash) {
4075         if (roworiented) {
4076           if (ignorezeroentries && v[i*n] == 0.0) continue;
4077           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4078         } else {
4079           if (ignorezeroentries && v[i] == 0.0) continue;
4080           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4081         }
4082       }
4083     }
4084   }}
4085   PetscFunctionReturnVoid();
4086 }
4087 EXTERN_C_END
4088