xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 585f4aefcf86cd9c757a2118a079358f4211c693)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   if (aij->rowvalues) {
369     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
370     aij->rowvalues = 0;
371   }
372 
373   /* used by MatAXPY() */
374   a->xtoy = 0; b->xtoy = 0;
375   a->XtoY = 0; b->XtoY = 0;
376 
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
383 {
384   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
385   PetscErrorCode ierr;
386 
387   PetscFunctionBegin;
388   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
389   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatZeroRows_MPIAIJ"
395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
396 {
397   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
398   PetscErrorCode ierr;
399   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
400   PetscInt       i,*owners = A->rmap.range;
401   PetscInt       *nprocs,j,idx,nsends,row;
402   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
403   PetscInt       *rvalues,count,base,slen,*source;
404   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
405   MPI_Comm       comm = A->comm;
406   MPI_Request    *send_waits,*recv_waits;
407   MPI_Status     recv_status,*send_status;
408 #if defined(PETSC_DEBUG)
409   PetscTruth     found = PETSC_FALSE;
410 #endif
411 
412   PetscFunctionBegin;
413   /*  first count number of contributors to each processor */
414   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
415   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
416   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
417   j = 0;
418   for (i=0; i<N; i++) {
419     if (lastidx > (idx = rows[i])) j = 0;
420     lastidx = idx;
421     for (; j<size; j++) {
422       if (idx >= owners[j] && idx < owners[j+1]) {
423         nprocs[2*j]++;
424         nprocs[2*j+1] = 1;
425         owner[i] = j;
426 #if defined(PETSC_DEBUG)
427         found = PETSC_TRUE;
428 #endif
429         break;
430       }
431     }
432 #if defined(PETSC_DEBUG)
433     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
434     found = PETSC_FALSE;
435 #endif
436   }
437   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
438 
439   /* inform other processors of number of messages and max length*/
440   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
441 
442   /* post receives:   */
443   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
444   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
445   for (i=0; i<nrecvs; i++) {
446     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
447   }
448 
449   /* do sends:
450       1) starts[i] gives the starting index in svalues for stuff going to
451          the ith processor
452   */
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
454   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
455   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
456   starts[0] = 0;
457   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
458   for (i=0; i<N; i++) {
459     svalues[starts[owner[i]]++] = rows[i];
460   }
461 
462   starts[0] = 0;
463   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
464   count = 0;
465   for (i=0; i<size; i++) {
466     if (nprocs[2*i+1]) {
467       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
468     }
469   }
470   ierr = PetscFree(starts);CHKERRQ(ierr);
471 
472   base = owners[rank];
473 
474   /*  wait on receives */
475   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
476   source = lens + nrecvs;
477   count  = nrecvs; slen = 0;
478   while (count) {
479     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
480     /* unpack receives into our local space */
481     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
482     source[imdex]  = recv_status.MPI_SOURCE;
483     lens[imdex]    = n;
484     slen          += n;
485     count--;
486   }
487   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
488 
489   /* move the data into the send scatter */
490   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
491   count = 0;
492   for (i=0; i<nrecvs; i++) {
493     values = rvalues + i*nmax;
494     for (j=0; j<lens[i]; j++) {
495       lrows[count++] = values[j] - base;
496     }
497   }
498   ierr = PetscFree(rvalues);CHKERRQ(ierr);
499   ierr = PetscFree(lens);CHKERRQ(ierr);
500   ierr = PetscFree(owner);CHKERRQ(ierr);
501   ierr = PetscFree(nprocs);CHKERRQ(ierr);
502 
503   /* actually zap the local rows */
504   /*
505         Zero the required rows. If the "diagonal block" of the matrix
506      is square and the user wishes to set the diagonal we use separate
507      code so that MatSetValues() is not called for each diagonal allocating
508      new memory, thus calling lots of mallocs and slowing things down.
509 
510        Contributed by: Matthew Knepley
511   */
512   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
513   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
514   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
515     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
516   } else if (diag != 0.0) {
517     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
518     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
519       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
521     }
522     for (i = 0; i < slen; i++) {
523       row  = lrows[i] + rstart;
524       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
525     }
526     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   } else {
529     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
530   }
531   ierr = PetscFree(lrows);CHKERRQ(ierr);
532 
533   /* wait on sends */
534   if (nsends) {
535     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
536     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
537     ierr = PetscFree(send_status);CHKERRQ(ierr);
538   }
539   ierr = PetscFree(send_waits);CHKERRQ(ierr);
540   ierr = PetscFree(svalues);CHKERRQ(ierr);
541 
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMult_MPIAIJ"
547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
548 {
549   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551   PetscInt       nt;
552 
553   PetscFunctionBegin;
554   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
555   if (nt != A->cmap.n) {
556     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
557   }
558   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
560   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
561   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMultAdd_MPIAIJ"
567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
570   PetscErrorCode ierr;
571 
572   PetscFunctionBegin;
573   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
575   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
576   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
583 {
584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
585   PetscErrorCode ierr;
586   PetscTruth     merged;
587 
588   PetscFunctionBegin;
589   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
590   /* do nondiagonal part */
591   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
592   if (!merged) {
593     /* send it on its way */
594     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595     /* do local part */
596     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
597     /* receive remote parts: note this assumes the values are not actually */
598     /* added in yy until the next line, */
599     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600   } else {
601     /* do local part */
602     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
603     /* send it on its way */
604     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605     /* values actually were received in the Begin() but we need to call this nop */
606     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 EXTERN_C_BEGIN
612 #undef __FUNCT__
613 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
615 {
616   MPI_Comm       comm;
617   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
618   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
619   IS             Me,Notme;
620   PetscErrorCode ierr;
621   PetscInt       M,N,first,last,*notme,i;
622   PetscMPIInt    size;
623 
624   PetscFunctionBegin;
625 
626   /* Easy test: symmetric diagonal block */
627   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
628   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
629   if (!*f) PetscFunctionReturn(0);
630   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
631   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
632   if (size == 1) PetscFunctionReturn(0);
633 
634   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
635   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
636   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
637   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
638   for (i=0; i<first; i++) notme[i] = i;
639   for (i=last; i<M; i++) notme[i-last+first] = i;
640   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
641   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
642   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
643   Aoff = Aoffs[0];
644   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
645   Boff = Boffs[0];
646   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
647   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
648   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
649   ierr = ISDestroy(Me);CHKERRQ(ierr);
650   ierr = ISDestroy(Notme);CHKERRQ(ierr);
651 
652   PetscFunctionReturn(0);
653 }
654 EXTERN_C_END
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
659 {
660   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
661   PetscErrorCode ierr;
662 
663   PetscFunctionBegin;
664   /* do nondiagonal part */
665   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
666   /* send it on its way */
667   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   /* do local part */
669   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
670   /* receive remote parts */
671   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676   This only works correctly for square matrices where the subblock A->A is the
677    diagonal block
678 */
679 #undef __FUNCT__
680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
682 {
683   PetscErrorCode ierr;
684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
685 
686   PetscFunctionBegin;
687   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
688   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
689     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
690   }
691   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatScale_MPIAIJ"
697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
698 {
699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
700   PetscErrorCode ierr;
701 
702   PetscFunctionBegin;
703   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
704   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatDestroy_MPIAIJ"
710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
711 {
712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
713   PetscErrorCode ierr;
714 
715   PetscFunctionBegin;
716 #if defined(PETSC_USE_LOG)
717   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
718 #endif
719   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
720   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
721   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
722 #if defined (PETSC_USE_CTABLE)
723   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
724 #else
725   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
726 #endif
727   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
728   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
729   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
730   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
731   ierr = PetscFree(aij);CHKERRQ(ierr);
732 
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
738   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
739   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatView_MPIAIJ_Binary"
745 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
746 {
747   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
748   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
749   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
750   PetscErrorCode    ierr;
751   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
752   int               fd;
753   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
754   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
755   PetscScalar       *column_values;
756 
757   PetscFunctionBegin;
758   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
759   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
760   nz   = A->nz + B->nz;
761   if (!rank) {
762     header[0] = MAT_FILE_COOKIE;
763     header[1] = mat->rmap.N;
764     header[2] = mat->cmap.N;
765     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
766     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
767     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
768     /* get largest number of rows any processor has */
769     rlen = mat->rmap.n;
770     range = mat->rmap.range;
771     for (i=1; i<size; i++) {
772       rlen = PetscMax(rlen,range[i+1] - range[i]);
773     }
774   } else {
775     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
776     rlen = mat->rmap.n;
777   }
778 
779   /* load up the local row counts */
780   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
781   for (i=0; i<mat->rmap.n; i++) {
782     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
783   }
784 
785   /* store the row lengths to the file */
786   if (!rank) {
787     MPI_Status status;
788     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
789     for (i=1; i<size; i++) {
790       rlen = range[i+1] - range[i];
791       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
792       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
793     }
794   } else {
795     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
796   }
797   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
798 
799   /* load up the local column indices */
800   nzmax = nz; /* )th processor needs space a largest processor needs */
801   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
802   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
803   cnt  = 0;
804   for (i=0; i<mat->rmap.n; i++) {
805     for (j=B->i[i]; j<B->i[i+1]; j++) {
806       if ( (col = garray[B->j[j]]) > cstart) break;
807       column_indices[cnt++] = col;
808     }
809     for (k=A->i[i]; k<A->i[i+1]; k++) {
810       column_indices[cnt++] = A->j[k] + cstart;
811     }
812     for (; j<B->i[i+1]; j++) {
813       column_indices[cnt++] = garray[B->j[j]];
814     }
815   }
816   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
817 
818   /* store the column indices to the file */
819   if (!rank) {
820     MPI_Status status;
821     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
822     for (i=1; i<size; i++) {
823       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
825       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
826       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
827     }
828   } else {
829     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
830     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
831   }
832   ierr = PetscFree(column_indices);CHKERRQ(ierr);
833 
834   /* load up the local column values */
835   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
836   cnt  = 0;
837   for (i=0; i<mat->rmap.n; i++) {
838     for (j=B->i[i]; j<B->i[i+1]; j++) {
839       if ( garray[B->j[j]] > cstart) break;
840       column_values[cnt++] = B->a[j];
841     }
842     for (k=A->i[i]; k<A->i[i+1]; k++) {
843       column_values[cnt++] = A->a[k];
844     }
845     for (; j<B->i[i+1]; j++) {
846       column_values[cnt++] = B->a[j];
847     }
848   }
849   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
850 
851   /* store the column values to the file */
852   if (!rank) {
853     MPI_Status status;
854     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
855     for (i=1; i<size; i++) {
856       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
858       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
859       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
860     }
861   } else {
862     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
863     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
864   }
865   ierr = PetscFree(column_values);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
871 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
872 {
873   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
874   PetscErrorCode    ierr;
875   PetscMPIInt       rank = aij->rank,size = aij->size;
876   PetscTruth        isdraw,iascii,isbinary;
877   PetscViewer       sviewer;
878   PetscViewerFormat format;
879 
880   PetscFunctionBegin;
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
882   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
883   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
884   if (iascii) {
885     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
886     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
887       MatInfo    info;
888       PetscTruth inodes;
889 
890       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
891       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
892       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
893       if (!inodes) {
894         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
895 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
896       } else {
897         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
898 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
899       }
900       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
903       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
904       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
905       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
906       PetscFunctionReturn(0);
907     } else if (format == PETSC_VIEWER_ASCII_INFO) {
908       PetscInt   inodecount,inodelimit,*inodes;
909       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
910       if (inodes) {
911         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
912       } else {
913         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
914       }
915       PetscFunctionReturn(0);
916     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
917       PetscFunctionReturn(0);
918     }
919   } else if (isbinary) {
920     if (size == 1) {
921       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
922       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
923     } else {
924       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
925     }
926     PetscFunctionReturn(0);
927   } else if (isdraw) {
928     PetscDraw  draw;
929     PetscTruth isnull;
930     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
931     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
932   }
933 
934   if (size == 1) {
935     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
936     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
937   } else {
938     /* assemble the entire matrix onto first processor. */
939     Mat         A;
940     Mat_SeqAIJ  *Aloc;
941     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
942     PetscScalar *a;
943 
944     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
945     if (!rank) {
946       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
947     } else {
948       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
949     }
950     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
951     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
952     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
953     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
954 
955     /* copy over the A part */
956     Aloc = (Mat_SeqAIJ*)aij->A->data;
957     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
958     row = mat->rmap.rstart;
959     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
960     for (i=0; i<m; i++) {
961       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
962       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
963     }
964     aj = Aloc->j;
965     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
966 
967     /* copy over the B part */
968     Aloc = (Mat_SeqAIJ*)aij->B->data;
969     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
970     row  = mat->rmap.rstart;
971     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
972     ct   = cols;
973     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
974     for (i=0; i<m; i++) {
975       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
976       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
977     }
978     ierr = PetscFree(ct);CHKERRQ(ierr);
979     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
980     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     /*
982        Everyone has to call to draw the matrix since the graphics waits are
983        synchronized across all processors that share the PetscDraw object
984     */
985     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
986     if (!rank) {
987       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
988       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
989     }
990     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
991     ierr = MatDestroy(A);CHKERRQ(ierr);
992   }
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatView_MPIAIJ"
998 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
999 {
1000   PetscErrorCode ierr;
1001   PetscTruth     iascii,isdraw,issocket,isbinary;
1002 
1003   PetscFunctionBegin;
1004   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1006   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1008   if (iascii || isdraw || isbinary || issocket) {
1009     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1010   } else {
1011     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1012   }
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatRelax_MPIAIJ"
1020 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1021 {
1022   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1023   PetscErrorCode ierr;
1024   Vec            bb1;
1025 
1026   PetscFunctionBegin;
1027   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1028 
1029   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1030 
1031   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1032     if (flag & SOR_ZERO_INITIAL_GUESS) {
1033       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1034       its--;
1035     }
1036 
1037     while (its--) {
1038       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040 
1041       /* update rhs: bb1 = bb - B*x */
1042       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1043       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1044 
1045       /* local sweep */
1046       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1047       CHKERRQ(ierr);
1048     }
1049   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1050     if (flag & SOR_ZERO_INITIAL_GUESS) {
1051       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1052       its--;
1053     }
1054     while (its--) {
1055       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057 
1058       /* update rhs: bb1 = bb - B*x */
1059       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1060       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1061 
1062       /* local sweep */
1063       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1064       CHKERRQ(ierr);
1065     }
1066   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1067     if (flag & SOR_ZERO_INITIAL_GUESS) {
1068       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1069       its--;
1070     }
1071     while (its--) {
1072       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074 
1075       /* update rhs: bb1 = bb - B*x */
1076       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1077       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1078 
1079       /* local sweep */
1080       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1081       CHKERRQ(ierr);
1082     }
1083   } else {
1084     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1085   }
1086 
1087   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatPermute_MPIAIJ"
1093 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1094 {
1095   MPI_Comm       comm,pcomm;
1096   PetscInt       first,local_size,nrows,*rows;
1097   int            ntids;
1098   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1103   /* make a collective version of 'rowp' */
1104   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1105   if (pcomm==comm) {
1106     crowp = rowp;
1107   } else {
1108     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1109     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1110     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1111     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1112   }
1113   /* collect the global row permutation and invert it */
1114   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1115   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1116   if (pcomm!=comm) {
1117     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1118   }
1119   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1120   /* get the local target indices */
1121   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1122   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1123   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1124   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1125   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1126   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1127   /* the column permutation is so much easier;
1128      make a local version of 'colp' and invert it */
1129   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1130   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1131   if (ntids==1) {
1132     lcolp = colp;
1133   } else {
1134     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1135     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1136     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1137   }
1138   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1139   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1140   if (ntids>1) {
1141     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1142     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1143   }
1144   /* now we just get the submatrix */
1145   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1146   /* clean up */
1147   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1148   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 #undef __FUNCT__
1153 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1154 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1155 {
1156   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1157   Mat            A = mat->A,B = mat->B;
1158   PetscErrorCode ierr;
1159   PetscReal      isend[5],irecv[5];
1160 
1161   PetscFunctionBegin;
1162   info->block_size     = 1.0;
1163   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1164   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1165   isend[3] = info->memory;  isend[4] = info->mallocs;
1166   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1167   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1168   isend[3] += info->memory;  isend[4] += info->mallocs;
1169   if (flag == MAT_LOCAL) {
1170     info->nz_used      = isend[0];
1171     info->nz_allocated = isend[1];
1172     info->nz_unneeded  = isend[2];
1173     info->memory       = isend[3];
1174     info->mallocs      = isend[4];
1175   } else if (flag == MAT_GLOBAL_MAX) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else if (flag == MAT_GLOBAL_SUM) {
1183     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1184     info->nz_used      = irecv[0];
1185     info->nz_allocated = irecv[1];
1186     info->nz_unneeded  = irecv[2];
1187     info->memory       = irecv[3];
1188     info->mallocs      = irecv[4];
1189   }
1190   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1191   info->fill_ratio_needed = 0;
1192   info->factor_mallocs    = 0;
1193   info->rows_global       = (double)matin->rmap.N;
1194   info->columns_global    = (double)matin->cmap.N;
1195   info->rows_local        = (double)matin->rmap.n;
1196   info->columns_local     = (double)matin->cmap.N;
1197 
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatSetOption_MPIAIJ"
1203 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1204 {
1205   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   switch (op) {
1210   case MAT_NO_NEW_NONZERO_LOCATIONS:
1211   case MAT_YES_NEW_NONZERO_LOCATIONS:
1212   case MAT_COLUMNS_UNSORTED:
1213   case MAT_COLUMNS_SORTED:
1214   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1215   case MAT_KEEP_ZEROED_ROWS:
1216   case MAT_NEW_NONZERO_LOCATION_ERR:
1217   case MAT_USE_INODES:
1218   case MAT_DO_NOT_USE_INODES:
1219   case MAT_IGNORE_ZERO_ENTRIES:
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1222     break;
1223   case MAT_ROW_ORIENTED:
1224     a->roworiented = PETSC_TRUE;
1225     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1226     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1227     break;
1228   case MAT_ROWS_SORTED:
1229   case MAT_ROWS_UNSORTED:
1230   case MAT_YES_NEW_DIAGONALS:
1231     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1232     break;
1233   case MAT_COLUMN_ORIENTED:
1234     a->roworiented = PETSC_FALSE;
1235     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1236     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1237     break;
1238   case MAT_IGNORE_OFF_PROC_ENTRIES:
1239     a->donotstash = PETSC_TRUE;
1240     break;
1241   case MAT_NO_NEW_DIAGONALS:
1242     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1243   case MAT_SYMMETRIC:
1244   case MAT_STRUCTURALLY_SYMMETRIC:
1245   case MAT_HERMITIAN:
1246   case MAT_SYMMETRY_ETERNAL:
1247     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1248     break;
1249   case MAT_NOT_SYMMETRIC:
1250   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1251   case MAT_NOT_HERMITIAN:
1252   case MAT_NOT_SYMMETRY_ETERNAL:
1253     break;
1254   default:
1255     SETERRQ(PETSC_ERR_SUP,"unknown option");
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatGetRow_MPIAIJ"
1262 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1263 {
1264   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1265   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1266   PetscErrorCode ierr;
1267   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1268   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1269   PetscInt       *cmap,*idx_p;
1270 
1271   PetscFunctionBegin;
1272   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1273   mat->getrowactive = PETSC_TRUE;
1274 
1275   if (!mat->rowvalues && (idx || v)) {
1276     /*
1277         allocate enough space to hold information from the longest row.
1278     */
1279     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1280     PetscInt     max = 1,tmp;
1281     for (i=0; i<matin->rmap.n; i++) {
1282       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1283       if (max < tmp) { max = tmp; }
1284     }
1285     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1286     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1287   }
1288 
1289   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1290   lrow = row - rstart;
1291 
1292   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1293   if (!v)   {pvA = 0; pvB = 0;}
1294   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1295   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1296   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1297   nztot = nzA + nzB;
1298 
1299   cmap  = mat->garray;
1300   if (v  || idx) {
1301     if (nztot) {
1302       /* Sort by increasing column numbers, assuming A and B already sorted */
1303       PetscInt imark = -1;
1304       if (v) {
1305         *v = v_p = mat->rowvalues;
1306         for (i=0; i<nzB; i++) {
1307           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1308           else break;
1309         }
1310         imark = i;
1311         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1312         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1313       }
1314       if (idx) {
1315         *idx = idx_p = mat->rowindices;
1316         if (imark > -1) {
1317           for (i=0; i<imark; i++) {
1318             idx_p[i] = cmap[cworkB[i]];
1319           }
1320         } else {
1321           for (i=0; i<nzB; i++) {
1322             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1323             else break;
1324           }
1325           imark = i;
1326         }
1327         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1328         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1329       }
1330     } else {
1331       if (idx) *idx = 0;
1332       if (v)   *v   = 0;
1333     }
1334   }
1335   *nz = nztot;
1336   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1337   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1343 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1344 {
1345   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1346 
1347   PetscFunctionBegin;
1348   if (!aij->getrowactive) {
1349     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1350   }
1351   aij->getrowactive = PETSC_FALSE;
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatNorm_MPIAIJ"
1357 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1358 {
1359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1360   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1361   PetscErrorCode ierr;
1362   PetscInt       i,j,cstart = mat->cmap.rstart;
1363   PetscReal      sum = 0.0;
1364   PetscScalar    *v;
1365 
1366   PetscFunctionBegin;
1367   if (aij->size == 1) {
1368     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1369   } else {
1370     if (type == NORM_FROBENIUS) {
1371       v = amat->a;
1372       for (i=0; i<amat->nz; i++) {
1373 #if defined(PETSC_USE_COMPLEX)
1374         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1375 #else
1376         sum += (*v)*(*v); v++;
1377 #endif
1378       }
1379       v = bmat->a;
1380       for (i=0; i<bmat->nz; i++) {
1381 #if defined(PETSC_USE_COMPLEX)
1382         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1383 #else
1384         sum += (*v)*(*v); v++;
1385 #endif
1386       }
1387       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1388       *norm = sqrt(*norm);
1389     } else if (type == NORM_1) { /* max column norm */
1390       PetscReal *tmp,*tmp2;
1391       PetscInt    *jj,*garray = aij->garray;
1392       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1393       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1394       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1395       *norm = 0.0;
1396       v = amat->a; jj = amat->j;
1397       for (j=0; j<amat->nz; j++) {
1398         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1399       }
1400       v = bmat->a; jj = bmat->j;
1401       for (j=0; j<bmat->nz; j++) {
1402         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1403       }
1404       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1405       for (j=0; j<mat->cmap.N; j++) {
1406         if (tmp2[j] > *norm) *norm = tmp2[j];
1407       }
1408       ierr = PetscFree(tmp);CHKERRQ(ierr);
1409       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1410     } else if (type == NORM_INFINITY) { /* max row norm */
1411       PetscReal ntemp = 0.0;
1412       for (j=0; j<aij->A->rmap.n; j++) {
1413         v = amat->a + amat->i[j];
1414         sum = 0.0;
1415         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1416           sum += PetscAbsScalar(*v); v++;
1417         }
1418         v = bmat->a + bmat->i[j];
1419         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1420           sum += PetscAbsScalar(*v); v++;
1421         }
1422         if (sum > ntemp) ntemp = sum;
1423       }
1424       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1425     } else {
1426       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1427     }
1428   }
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatTranspose_MPIAIJ"
1434 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1435 {
1436   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1437   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1438   PetscErrorCode ierr;
1439   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1440   Mat            B;
1441   PetscScalar    *array;
1442 
1443   PetscFunctionBegin;
1444   if (!matout && M != N) {
1445     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1446   }
1447 
1448   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1449   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1450   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1451   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1452 
1453   /* copy over the A part */
1454   Aloc = (Mat_SeqAIJ*)a->A->data;
1455   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1456   row = A->rmap.rstart;
1457   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1458   for (i=0; i<m; i++) {
1459     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1460     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1461   }
1462   aj = Aloc->j;
1463   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1464 
1465   /* copy over the B part */
1466   Aloc = (Mat_SeqAIJ*)a->B->data;
1467   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1468   row  = A->rmap.rstart;
1469   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1470   ct   = cols;
1471   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1472   for (i=0; i<m; i++) {
1473     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1474     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1475   }
1476   ierr = PetscFree(ct);CHKERRQ(ierr);
1477   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1478   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1479   if (matout) {
1480     *matout = B;
1481   } else {
1482     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1489 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1490 {
1491   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1492   Mat            a = aij->A,b = aij->B;
1493   PetscErrorCode ierr;
1494   PetscInt       s1,s2,s3;
1495 
1496   PetscFunctionBegin;
1497   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1498   if (rr) {
1499     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1500     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1501     /* Overlap communication with computation. */
1502     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1503   }
1504   if (ll) {
1505     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1506     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1507     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1508   }
1509   /* scale  the diagonal block */
1510   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1511 
1512   if (rr) {
1513     /* Do a scatter end and then right scale the off-diagonal block */
1514     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1515     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1516   }
1517 
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1524 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1525 {
1526   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   if (!a->rank) {
1531     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1532   }
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1538 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1539 {
1540   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1545   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1546   PetscFunctionReturn(0);
1547 }
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1550 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1551 {
1552   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatEqual_MPIAIJ"
1562 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1563 {
1564   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1565   Mat            a,b,c,d;
1566   PetscTruth     flg;
1567   PetscErrorCode ierr;
1568 
1569   PetscFunctionBegin;
1570   a = matA->A; b = matA->B;
1571   c = matB->A; d = matB->B;
1572 
1573   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1574   if (flg) {
1575     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1576   }
1577   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatCopy_MPIAIJ"
1583 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1584 {
1585   PetscErrorCode ierr;
1586   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1587   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1588 
1589   PetscFunctionBegin;
1590   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1591   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1592     /* because of the column compression in the off-processor part of the matrix a->B,
1593        the number of columns in a->B and b->B may be different, hence we cannot call
1594        the MatCopy() directly on the two parts. If need be, we can provide a more
1595        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1596        then copying the submatrices */
1597     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1598   } else {
1599     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1600     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1601   }
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1607 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1608 {
1609   PetscErrorCode ierr;
1610 
1611   PetscFunctionBegin;
1612   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 #include "petscblaslapack.h"
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatAXPY_MPIAIJ"
1619 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1620 {
1621   PetscErrorCode ierr;
1622   PetscInt       i;
1623   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1624   PetscBLASInt   bnz,one=1;
1625   Mat_SeqAIJ     *x,*y;
1626 
1627   PetscFunctionBegin;
1628   if (str == SAME_NONZERO_PATTERN) {
1629     PetscScalar alpha = a;
1630     x = (Mat_SeqAIJ *)xx->A->data;
1631     y = (Mat_SeqAIJ *)yy->A->data;
1632     bnz = (PetscBLASInt)x->nz;
1633     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1634     x = (Mat_SeqAIJ *)xx->B->data;
1635     y = (Mat_SeqAIJ *)yy->B->data;
1636     bnz = (PetscBLASInt)x->nz;
1637     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1638   } else if (str == SUBSET_NONZERO_PATTERN) {
1639     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1640 
1641     x = (Mat_SeqAIJ *)xx->B->data;
1642     y = (Mat_SeqAIJ *)yy->B->data;
1643     if (y->xtoy && y->XtoY != xx->B) {
1644       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1645       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1646     }
1647     if (!y->xtoy) { /* get xtoy */
1648       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1649       y->XtoY = xx->B;
1650     }
1651     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1652   } else {
1653     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1654   }
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatConjugate_MPIAIJ"
1662 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1663 {
1664 #if defined(PETSC_USE_COMPLEX)
1665   PetscErrorCode ierr;
1666   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1667 
1668   PetscFunctionBegin;
1669   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1670   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1671 #else
1672   PetscFunctionBegin;
1673 #endif
1674   PetscFunctionReturn(0);
1675 }
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatRealPart_MPIAIJ"
1679 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1680 {
1681   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1686   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1692 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1693 {
1694   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1699   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 /* -------------------------------------------------------------------*/
1704 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1705        MatGetRow_MPIAIJ,
1706        MatRestoreRow_MPIAIJ,
1707        MatMult_MPIAIJ,
1708 /* 4*/ MatMultAdd_MPIAIJ,
1709        MatMultTranspose_MPIAIJ,
1710        MatMultTransposeAdd_MPIAIJ,
1711        0,
1712        0,
1713        0,
1714 /*10*/ 0,
1715        0,
1716        0,
1717        MatRelax_MPIAIJ,
1718        MatTranspose_MPIAIJ,
1719 /*15*/ MatGetInfo_MPIAIJ,
1720        MatEqual_MPIAIJ,
1721        MatGetDiagonal_MPIAIJ,
1722        MatDiagonalScale_MPIAIJ,
1723        MatNorm_MPIAIJ,
1724 /*20*/ MatAssemblyBegin_MPIAIJ,
1725        MatAssemblyEnd_MPIAIJ,
1726        0,
1727        MatSetOption_MPIAIJ,
1728        MatZeroEntries_MPIAIJ,
1729 /*25*/ MatZeroRows_MPIAIJ,
1730        0,
1731        0,
1732        0,
1733        0,
1734 /*30*/ MatSetUpPreallocation_MPIAIJ,
1735        0,
1736        0,
1737        0,
1738        0,
1739 /*35*/ MatDuplicate_MPIAIJ,
1740        0,
1741        0,
1742        0,
1743        0,
1744 /*40*/ MatAXPY_MPIAIJ,
1745        MatGetSubMatrices_MPIAIJ,
1746        MatIncreaseOverlap_MPIAIJ,
1747        MatGetValues_MPIAIJ,
1748        MatCopy_MPIAIJ,
1749 /*45*/ MatPrintHelp_MPIAIJ,
1750        MatScale_MPIAIJ,
1751        0,
1752        0,
1753        0,
1754 /*50*/ MatSetBlockSize_MPIAIJ,
1755        0,
1756        0,
1757        0,
1758        0,
1759 /*55*/ MatFDColoringCreate_MPIAIJ,
1760        0,
1761        MatSetUnfactored_MPIAIJ,
1762        MatPermute_MPIAIJ,
1763        0,
1764 /*60*/ MatGetSubMatrix_MPIAIJ,
1765        MatDestroy_MPIAIJ,
1766        MatView_MPIAIJ,
1767        0,
1768        0,
1769 /*65*/ 0,
1770        0,
1771        0,
1772        0,
1773        0,
1774 /*70*/ 0,
1775        0,
1776        MatSetColoring_MPIAIJ,
1777 #if defined(PETSC_HAVE_ADIC)
1778        MatSetValuesAdic_MPIAIJ,
1779 #else
1780        0,
1781 #endif
1782        MatSetValuesAdifor_MPIAIJ,
1783 /*75*/ 0,
1784        0,
1785        0,
1786        0,
1787        0,
1788 /*80*/ 0,
1789        0,
1790        0,
1791        0,
1792 /*84*/ MatLoad_MPIAIJ,
1793        0,
1794        0,
1795        0,
1796        0,
1797        0,
1798 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1799        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1800        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1801        MatPtAP_Basic,
1802        MatPtAPSymbolic_MPIAIJ,
1803 /*95*/ MatPtAPNumeric_MPIAIJ,
1804        0,
1805        0,
1806        0,
1807        0,
1808 /*100*/0,
1809        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1810        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1811        MatConjugate_MPIAIJ,
1812        0,
1813 /*105*/MatSetValuesRow_MPIAIJ,
1814        MatRealPart_MPIAIJ,
1815        MatImaginaryPart_MPIAIJ};
1816 
1817 /* ----------------------------------------------------------------------------------------*/
1818 
1819 EXTERN_C_BEGIN
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1822 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1823 {
1824   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1829   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 EXTERN_C_END
1833 
1834 EXTERN_C_BEGIN
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1837 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1838 {
1839   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1840   PetscErrorCode ierr;
1841 
1842   PetscFunctionBegin;
1843   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1844   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 EXTERN_C_END
1848 
1849 #include "petscpc.h"
1850 EXTERN_C_BEGIN
1851 #undef __FUNCT__
1852 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1853 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1854 {
1855   Mat_MPIAIJ     *b;
1856   PetscErrorCode ierr;
1857   PetscInt       i;
1858 
1859   PetscFunctionBegin;
1860   B->preallocated = PETSC_TRUE;
1861   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1862   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1863   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1864   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1865 
1866   B->rmap.bs = B->cmap.bs = 1;
1867   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1868   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1869   if (d_nnz) {
1870     for (i=0; i<B->rmap.n; i++) {
1871       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1872     }
1873   }
1874   if (o_nnz) {
1875     for (i=0; i<B->rmap.n; i++) {
1876       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1877     }
1878   }
1879   b = (Mat_MPIAIJ*)B->data;
1880 
1881   /* Explicitly create 2 MATSEQAIJ matrices. */
1882   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1883   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1884   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1885   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1886   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1887   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1888   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1889   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1890 
1891   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1892   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1893 
1894   PetscFunctionReturn(0);
1895 }
1896 EXTERN_C_END
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1900 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1901 {
1902   Mat            mat;
1903   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1904   PetscErrorCode ierr;
1905 
1906   PetscFunctionBegin;
1907   *newmat       = 0;
1908   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1909   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1910   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1911   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1912   a    = (Mat_MPIAIJ*)mat->data;
1913 
1914   mat->factor       = matin->factor;
1915   mat->rmap.bs      = matin->rmap.bs;
1916   mat->assembled    = PETSC_TRUE;
1917   mat->insertmode   = NOT_SET_VALUES;
1918   mat->preallocated = PETSC_TRUE;
1919 
1920   a->size           = oldmat->size;
1921   a->rank           = oldmat->rank;
1922   a->donotstash     = oldmat->donotstash;
1923   a->roworiented    = oldmat->roworiented;
1924   a->rowindices     = 0;
1925   a->rowvalues      = 0;
1926   a->getrowactive   = PETSC_FALSE;
1927 
1928   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1929   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1930 
1931   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1932   if (oldmat->colmap) {
1933 #if defined (PETSC_USE_CTABLE)
1934     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1935 #else
1936     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1937     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1938     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1939 #endif
1940   } else a->colmap = 0;
1941   if (oldmat->garray) {
1942     PetscInt len;
1943     len  = oldmat->B->cmap.n;
1944     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1945     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1946     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1947   } else a->garray = 0;
1948 
1949   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1951   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1953   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1955   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1956   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1957   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1958   *newmat = mat;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #include "petscsys.h"
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatLoad_MPIAIJ"
1966 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1967 {
1968   Mat            A;
1969   PetscScalar    *vals,*svals;
1970   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1971   MPI_Status     status;
1972   PetscErrorCode ierr;
1973   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1974   PetscInt       i,nz,j,rstart,rend,mmax;
1975   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1976   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1977   PetscInt       cend,cstart,n,*rowners;
1978   int            fd;
1979 
1980   PetscFunctionBegin;
1981   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1982   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1983   if (!rank) {
1984     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1985     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1986     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1987   }
1988 
1989   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1990   M = header[1]; N = header[2];
1991   /* determine ownership of all rows */
1992   m    = M/size + ((M % size) > rank);
1993   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1994   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1995 
1996   /* First process needs enough room for process with most rows */
1997   if (!rank) {
1998     mmax       = rowners[1];
1999     for (i=2; i<size; i++) {
2000       mmax = PetscMax(mmax,rowners[i]);
2001     }
2002   } else mmax = m;
2003 
2004   rowners[0] = 0;
2005   for (i=2; i<=size; i++) {
2006     mmax       = PetscMax(mmax,rowners[i]);
2007     rowners[i] += rowners[i-1];
2008   }
2009   rstart = rowners[rank];
2010   rend   = rowners[rank+1];
2011 
2012   /* distribute row lengths to all processors */
2013   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2014   if (!rank) {
2015     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2016     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2017     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2018     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2019     for (j=0; j<m; j++) {
2020       procsnz[0] += ourlens[j];
2021     }
2022     for (i=1; i<size; i++) {
2023       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2024       /* calculate the number of nonzeros on each processor */
2025       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2026         procsnz[i] += rowlengths[j];
2027       }
2028       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2029     }
2030     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2031   } else {
2032     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2033   }
2034 
2035   if (!rank) {
2036     /* determine max buffer needed and allocate it */
2037     maxnz = 0;
2038     for (i=0; i<size; i++) {
2039       maxnz = PetscMax(maxnz,procsnz[i]);
2040     }
2041     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2042 
2043     /* read in my part of the matrix column indices  */
2044     nz   = procsnz[0];
2045     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2046     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2047 
2048     /* read in every one elses and ship off */
2049     for (i=1; i<size; i++) {
2050       nz   = procsnz[i];
2051       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2052       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2053     }
2054     ierr = PetscFree(cols);CHKERRQ(ierr);
2055   } else {
2056     /* determine buffer space needed for message */
2057     nz = 0;
2058     for (i=0; i<m; i++) {
2059       nz += ourlens[i];
2060     }
2061     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2062 
2063     /* receive message of column indices*/
2064     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2065     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2066     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2067   }
2068 
2069   /* determine column ownership if matrix is not square */
2070   if (N != M) {
2071     n      = N/size + ((N % size) > rank);
2072     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2073     cstart = cend - n;
2074   } else {
2075     cstart = rstart;
2076     cend   = rend;
2077     n      = cend - cstart;
2078   }
2079 
2080   /* loop over local rows, determining number of off diagonal entries */
2081   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2082   jj = 0;
2083   for (i=0; i<m; i++) {
2084     for (j=0; j<ourlens[i]; j++) {
2085       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2086       jj++;
2087     }
2088   }
2089 
2090   /* create our matrix */
2091   for (i=0; i<m; i++) {
2092     ourlens[i] -= offlens[i];
2093   }
2094   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2095   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2096   ierr = MatSetType(A,type);CHKERRQ(ierr);
2097   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2098 
2099   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2100   for (i=0; i<m; i++) {
2101     ourlens[i] += offlens[i];
2102   }
2103 
2104   if (!rank) {
2105     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2106 
2107     /* read in my part of the matrix numerical values  */
2108     nz   = procsnz[0];
2109     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2110 
2111     /* insert into matrix */
2112     jj      = rstart;
2113     smycols = mycols;
2114     svals   = vals;
2115     for (i=0; i<m; i++) {
2116       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2117       smycols += ourlens[i];
2118       svals   += ourlens[i];
2119       jj++;
2120     }
2121 
2122     /* read in other processors and ship out */
2123     for (i=1; i<size; i++) {
2124       nz   = procsnz[i];
2125       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2126       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2127     }
2128     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2129   } else {
2130     /* receive numeric values */
2131     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2132 
2133     /* receive message of values*/
2134     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2135     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2136     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2137 
2138     /* insert into matrix */
2139     jj      = rstart;
2140     smycols = mycols;
2141     svals   = vals;
2142     for (i=0; i<m; i++) {
2143       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2144       smycols += ourlens[i];
2145       svals   += ourlens[i];
2146       jj++;
2147     }
2148   }
2149   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2150   ierr = PetscFree(vals);CHKERRQ(ierr);
2151   ierr = PetscFree(mycols);CHKERRQ(ierr);
2152   ierr = PetscFree(rowners);CHKERRQ(ierr);
2153 
2154   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2155   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   *newmat = A;
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2162 /*
2163     Not great since it makes two copies of the submatrix, first an SeqAIJ
2164   in local and then by concatenating the local matrices the end result.
2165   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2166 */
2167 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2168 {
2169   PetscErrorCode ierr;
2170   PetscMPIInt    rank,size;
2171   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2172   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2173   Mat            *local,M,Mreuse;
2174   PetscScalar    *vwork,*aa;
2175   MPI_Comm       comm = mat->comm;
2176   Mat_SeqAIJ     *aij;
2177 
2178 
2179   PetscFunctionBegin;
2180   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182 
2183   if (call ==  MAT_REUSE_MATRIX) {
2184     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2185     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2186     local = &Mreuse;
2187     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2188   } else {
2189     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2190     Mreuse = *local;
2191     ierr   = PetscFree(local);CHKERRQ(ierr);
2192   }
2193 
2194   /*
2195       m - number of local rows
2196       n - number of columns (same on all processors)
2197       rstart - first row in new global matrix generated
2198   */
2199   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2200   if (call == MAT_INITIAL_MATRIX) {
2201     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2202     ii  = aij->i;
2203     jj  = aij->j;
2204 
2205     /*
2206         Determine the number of non-zeros in the diagonal and off-diagonal
2207         portions of the matrix in order to do correct preallocation
2208     */
2209 
2210     /* first get start and end of "diagonal" columns */
2211     if (csize == PETSC_DECIDE) {
2212       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2213       if (mglobal == n) { /* square matrix */
2214 	nlocal = m;
2215       } else {
2216         nlocal = n/size + ((n % size) > rank);
2217       }
2218     } else {
2219       nlocal = csize;
2220     }
2221     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2222     rstart = rend - nlocal;
2223     if (rank == size - 1 && rend != n) {
2224       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2225     }
2226 
2227     /* next, compute all the lengths */
2228     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2229     olens = dlens + m;
2230     for (i=0; i<m; i++) {
2231       jend = ii[i+1] - ii[i];
2232       olen = 0;
2233       dlen = 0;
2234       for (j=0; j<jend; j++) {
2235         if (*jj < rstart || *jj >= rend) olen++;
2236         else dlen++;
2237         jj++;
2238       }
2239       olens[i] = olen;
2240       dlens[i] = dlen;
2241     }
2242     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2243     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2244     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2245     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2246     ierr = PetscFree(dlens);CHKERRQ(ierr);
2247   } else {
2248     PetscInt ml,nl;
2249 
2250     M = *newmat;
2251     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2252     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2253     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2254     /*
2255          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2256        rather than the slower MatSetValues().
2257     */
2258     M->was_assembled = PETSC_TRUE;
2259     M->assembled     = PETSC_FALSE;
2260   }
2261   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2262   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2263   ii  = aij->i;
2264   jj  = aij->j;
2265   aa  = aij->a;
2266   for (i=0; i<m; i++) {
2267     row   = rstart + i;
2268     nz    = ii[i+1] - ii[i];
2269     cwork = jj;     jj += nz;
2270     vwork = aa;     aa += nz;
2271     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2272   }
2273 
2274   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2276   *newmat = M;
2277 
2278   /* save submatrix used in processor for next request */
2279   if (call ==  MAT_INITIAL_MATRIX) {
2280     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2281     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2282   }
2283 
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 EXTERN_C_BEGIN
2288 #undef __FUNCT__
2289 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2290 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2291 {
2292   PetscInt       m,cstart, cend,j,nnz,i,d;
2293   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2294   const PetscInt *JJ;
2295   PetscScalar    *values;
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2300 
2301   B->rmap.bs = B->cmap.bs = 1;
2302   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2303   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2304   m      = B->rmap.n;
2305   cstart = B->cmap.rstart;
2306   cend   = B->cmap.rend;
2307   rstart = B->rmap.rstart;
2308 
2309   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2310   o_nnz = d_nnz + m;
2311 
2312   for (i=0; i<m; i++) {
2313     nnz     = I[i+1]- I[i];
2314     JJ      = J + I[i];
2315     nnz_max = PetscMax(nnz_max,nnz);
2316     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2317     for (j=0; j<nnz; j++) {
2318       if (*JJ >= cstart) break;
2319       JJ++;
2320     }
2321     d = 0;
2322     for (; j<nnz; j++) {
2323       if (*JJ++ >= cend) break;
2324       d++;
2325     }
2326     d_nnz[i] = d;
2327     o_nnz[i] = nnz - d;
2328   }
2329   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2330   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2331 
2332   if (v) values = (PetscScalar*)v;
2333   else {
2334     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2335     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2336   }
2337 
2338   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2339   for (i=0; i<m; i++) {
2340     ii   = i + rstart;
2341     nnz  = I[i+1]- I[i];
2342     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2343   }
2344   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2345   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2346   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2347 
2348   if (!v) {
2349     ierr = PetscFree(values);CHKERRQ(ierr);
2350   }
2351   PetscFunctionReturn(0);
2352 }
2353 EXTERN_C_END
2354 
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2357 /*@
2358    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2359    (the default parallel PETSc format).
2360 
2361    Collective on MPI_Comm
2362 
2363    Input Parameters:
2364 +  B - the matrix
2365 .  i - the indices into j for the start of each local row (starts with zero)
2366 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2367 -  v - optional values in the matrix
2368 
2369    Level: developer
2370 
2371 .keywords: matrix, aij, compressed row, sparse, parallel
2372 
2373 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2374 @*/
2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2376 {
2377   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2381   if (f) {
2382     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2389 /*@C
2390    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2391    (the default parallel PETSc format).  For good matrix assembly performance
2392    the user should preallocate the matrix storage by setting the parameters
2393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2394    performance can be increased by more than a factor of 50.
2395 
2396    Collective on MPI_Comm
2397 
2398    Input Parameters:
2399 +  A - the matrix
2400 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2401            (same value is used for all local rows)
2402 .  d_nnz - array containing the number of nonzeros in the various rows of the
2403            DIAGONAL portion of the local submatrix (possibly different for each row)
2404            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2405            The size of this array is equal to the number of local rows, i.e 'm'.
2406            You must leave room for the diagonal entry even if it is zero.
2407 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2408            submatrix (same value is used for all local rows).
2409 -  o_nnz - array containing the number of nonzeros in the various rows of the
2410            OFF-DIAGONAL portion of the local submatrix (possibly different for
2411            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2412            structure. The size of this array is equal to the number
2413            of local rows, i.e 'm'.
2414 
2415    If the *_nnz parameter is given then the *_nz parameter is ignored
2416 
2417    The AIJ format (also called the Yale sparse matrix format or
2418    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2419    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2420 
2421    The parallel matrix is partitioned such that the first m0 rows belong to
2422    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2423    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2424 
2425    The DIAGONAL portion of the local submatrix of a processor can be defined
2426    as the submatrix which is obtained by extraction the part corresponding
2427    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2428    first row that belongs to the processor, and r2 is the last row belonging
2429    to the this processor. This is a square mxm matrix. The remaining portion
2430    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2431 
2432    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2433 
2434    Example usage:
2435 
2436    Consider the following 8x8 matrix with 34 non-zero values, that is
2437    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2438    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2439    as follows:
2440 
2441 .vb
2442             1  2  0  |  0  3  0  |  0  4
2443     Proc0   0  5  6  |  7  0  0  |  8  0
2444             9  0 10  | 11  0  0  | 12  0
2445     -------------------------------------
2446            13  0 14  | 15 16 17  |  0  0
2447     Proc1   0 18  0  | 19 20 21  |  0  0
2448             0  0  0  | 22 23  0  | 24  0
2449     -------------------------------------
2450     Proc2  25 26 27  |  0  0 28  | 29  0
2451            30  0  0  | 31 32 33  |  0 34
2452 .ve
2453 
2454    This can be represented as a collection of submatrices as:
2455 
2456 .vb
2457       A B C
2458       D E F
2459       G H I
2460 .ve
2461 
2462    Where the submatrices A,B,C are owned by proc0, D,E,F are
2463    owned by proc1, G,H,I are owned by proc2.
2464 
2465    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2466    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2467    The 'M','N' parameters are 8,8, and have the same values on all procs.
2468 
2469    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2470    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2471    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2472    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2473    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2474    matrix, ans [DF] as another SeqAIJ matrix.
2475 
2476    When d_nz, o_nz parameters are specified, d_nz storage elements are
2477    allocated for every row of the local diagonal submatrix, and o_nz
2478    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2479    One way to choose d_nz and o_nz is to use the max nonzerors per local
2480    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2481    In this case, the values of d_nz,o_nz are:
2482 .vb
2483      proc0 : dnz = 2, o_nz = 2
2484      proc1 : dnz = 3, o_nz = 2
2485      proc2 : dnz = 1, o_nz = 4
2486 .ve
2487    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2488    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2489    for proc3. i.e we are using 12+15+10=37 storage locations to store
2490    34 values.
2491 
2492    When d_nnz, o_nnz parameters are specified, the storage is specified
2493    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2494    In the above case the values for d_nnz,o_nnz are:
2495 .vb
2496      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2497      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2498      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2499 .ve
2500    Here the space allocated is sum of all the above values i.e 34, and
2501    hence pre-allocation is perfect.
2502 
2503    Level: intermediate
2504 
2505 .keywords: matrix, aij, compressed row, sparse, parallel
2506 
2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2508           MPIAIJ
2509 @*/
2510 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2511 {
2512   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2513 
2514   PetscFunctionBegin;
2515   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2516   if (f) {
2517     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatCreateMPIAIJ"
2524 /*@C
2525    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2526    (the default parallel PETSc format).  For good matrix assembly performance
2527    the user should preallocate the matrix storage by setting the parameters
2528    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2529    performance can be increased by more than a factor of 50.
2530 
2531    Collective on MPI_Comm
2532 
2533    Input Parameters:
2534 +  comm - MPI communicator
2535 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2536            This value should be the same as the local size used in creating the
2537            y vector for the matrix-vector product y = Ax.
2538 .  n - This value should be the same as the local size used in creating the
2539        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2540        calculated if N is given) For square matrices n is almost always m.
2541 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2542 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2543 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2544            (same value is used for all local rows)
2545 .  d_nnz - array containing the number of nonzeros in the various rows of the
2546            DIAGONAL portion of the local submatrix (possibly different for each row)
2547            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2548            The size of this array is equal to the number of local rows, i.e 'm'.
2549            You must leave room for the diagonal entry even if it is zero.
2550 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2551            submatrix (same value is used for all local rows).
2552 -  o_nnz - array containing the number of nonzeros in the various rows of the
2553            OFF-DIAGONAL portion of the local submatrix (possibly different for
2554            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2555            structure. The size of this array is equal to the number
2556            of local rows, i.e 'm'.
2557 
2558    Output Parameter:
2559 .  A - the matrix
2560 
2561    Notes:
2562    If the *_nnz parameter is given then the *_nz parameter is ignored
2563 
2564    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2565    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2566    storage requirements for this matrix.
2567 
2568    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2569    processor than it must be used on all processors that share the object for
2570    that argument.
2571 
2572    The user MUST specify either the local or global matrix dimensions
2573    (possibly both).
2574 
2575    The parallel matrix is partitioned such that the first m0 rows belong to
2576    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2577    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2578 
2579    The DIAGONAL portion of the local submatrix of a processor can be defined
2580    as the submatrix which is obtained by extraction the part corresponding
2581    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2582    first row that belongs to the processor, and r2 is the last row belonging
2583    to the this processor. This is a square mxm matrix. The remaining portion
2584    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2585 
2586    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2587 
2588    When calling this routine with a single process communicator, a matrix of
2589    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2590    type of communicator, use the construction mechanism:
2591      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2592 
2593    By default, this format uses inodes (identical nodes) when possible.
2594    We search for consecutive rows with the same nonzero structure, thereby
2595    reusing matrix information to achieve increased efficiency.
2596 
2597    Options Database Keys:
2598 +  -mat_no_inode  - Do not use inodes
2599 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2600 -  -mat_aij_oneindex - Internally use indexing starting at 1
2601         rather than 0.  Note that when calling MatSetValues(),
2602         the user still MUST index entries starting at 0!
2603 
2604 
2605    Example usage:
2606 
2607    Consider the following 8x8 matrix with 34 non-zero values, that is
2608    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2609    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2610    as follows:
2611 
2612 .vb
2613             1  2  0  |  0  3  0  |  0  4
2614     Proc0   0  5  6  |  7  0  0  |  8  0
2615             9  0 10  | 11  0  0  | 12  0
2616     -------------------------------------
2617            13  0 14  | 15 16 17  |  0  0
2618     Proc1   0 18  0  | 19 20 21  |  0  0
2619             0  0  0  | 22 23  0  | 24  0
2620     -------------------------------------
2621     Proc2  25 26 27  |  0  0 28  | 29  0
2622            30  0  0  | 31 32 33  |  0 34
2623 .ve
2624 
2625    This can be represented as a collection of submatrices as:
2626 
2627 .vb
2628       A B C
2629       D E F
2630       G H I
2631 .ve
2632 
2633    Where the submatrices A,B,C are owned by proc0, D,E,F are
2634    owned by proc1, G,H,I are owned by proc2.
2635 
2636    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2637    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2638    The 'M','N' parameters are 8,8, and have the same values on all procs.
2639 
2640    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2641    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2642    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2643    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2644    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2645    matrix, ans [DF] as another SeqAIJ matrix.
2646 
2647    When d_nz, o_nz parameters are specified, d_nz storage elements are
2648    allocated for every row of the local diagonal submatrix, and o_nz
2649    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2650    One way to choose d_nz and o_nz is to use the max nonzerors per local
2651    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2652    In this case, the values of d_nz,o_nz are:
2653 .vb
2654      proc0 : dnz = 2, o_nz = 2
2655      proc1 : dnz = 3, o_nz = 2
2656      proc2 : dnz = 1, o_nz = 4
2657 .ve
2658    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2659    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2660    for proc3. i.e we are using 12+15+10=37 storage locations to store
2661    34 values.
2662 
2663    When d_nnz, o_nnz parameters are specified, the storage is specified
2664    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2665    In the above case the values for d_nnz,o_nnz are:
2666 .vb
2667      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2668      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2669      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2670 .ve
2671    Here the space allocated is sum of all the above values i.e 34, and
2672    hence pre-allocation is perfect.
2673 
2674    Level: intermediate
2675 
2676 .keywords: matrix, aij, compressed row, sparse, parallel
2677 
2678 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2679           MPIAIJ
2680 @*/
2681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2682 {
2683   PetscErrorCode ierr;
2684   PetscMPIInt    size;
2685 
2686   PetscFunctionBegin;
2687   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2688   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2689   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2690   if (size > 1) {
2691     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2692     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2693   } else {
2694     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2695     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2696   }
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2702 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2703 {
2704   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2705 
2706   PetscFunctionBegin;
2707   *Ad     = a->A;
2708   *Ao     = a->B;
2709   *colmap = a->garray;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2715 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2716 {
2717   PetscErrorCode ierr;
2718   PetscInt       i;
2719   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2720 
2721   PetscFunctionBegin;
2722   if (coloring->ctype == IS_COLORING_LOCAL) {
2723     ISColoringValue *allcolors,*colors;
2724     ISColoring      ocoloring;
2725 
2726     /* set coloring for diagonal portion */
2727     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2728 
2729     /* set coloring for off-diagonal portion */
2730     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2731     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2732     for (i=0; i<a->B->cmap.n; i++) {
2733       colors[i] = allcolors[a->garray[i]];
2734     }
2735     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2736     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2737     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2738     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2739   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2740     ISColoringValue *colors;
2741     PetscInt        *larray;
2742     ISColoring      ocoloring;
2743 
2744     /* set coloring for diagonal portion */
2745     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2746     for (i=0; i<a->A->cmap.n; i++) {
2747       larray[i] = i + A->cmap.rstart;
2748     }
2749     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2750     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2751     for (i=0; i<a->A->cmap.n; i++) {
2752       colors[i] = coloring->colors[larray[i]];
2753     }
2754     ierr = PetscFree(larray);CHKERRQ(ierr);
2755     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2756     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2757     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2758 
2759     /* set coloring for off-diagonal portion */
2760     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2761     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2762     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2763     for (i=0; i<a->B->cmap.n; i++) {
2764       colors[i] = coloring->colors[larray[i]];
2765     }
2766     ierr = PetscFree(larray);CHKERRQ(ierr);
2767     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2768     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2769     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2770   } else {
2771     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2772   }
2773 
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #if defined(PETSC_HAVE_ADIC)
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2780 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2781 {
2782   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2783   PetscErrorCode ierr;
2784 
2785   PetscFunctionBegin;
2786   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2787   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 #endif
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2794 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2795 {
2796   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2797   PetscErrorCode ierr;
2798 
2799   PetscFunctionBegin;
2800   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2801   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2802   PetscFunctionReturn(0);
2803 }
2804 
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatMerge"
2807 /*@C
2808       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2809                  matrices from each processor
2810 
2811     Collective on MPI_Comm
2812 
2813    Input Parameters:
2814 +    comm - the communicators the parallel matrix will live on
2815 .    inmat - the input sequential matrices
2816 .    n - number of local columns (or PETSC_DECIDE)
2817 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2818 
2819    Output Parameter:
2820 .    outmat - the parallel matrix generated
2821 
2822     Level: advanced
2823 
2824    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2825 
2826 @*/
2827 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2828 {
2829   PetscErrorCode ierr;
2830   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2831   PetscInt       *indx;
2832   PetscScalar    *values;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2836   if (scall == MAT_INITIAL_MATRIX){
2837     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2838     if (n == PETSC_DECIDE){
2839       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2840     }
2841     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2842     rstart -= m;
2843 
2844     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2845     for (i=0;i<m;i++) {
2846       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2847       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2848       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2849     }
2850     /* This routine will ONLY return MPIAIJ type matrix */
2851     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2852     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2853     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2854     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2855     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2856 
2857   } else if (scall == MAT_REUSE_MATRIX){
2858     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2859   } else {
2860     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2861   }
2862 
2863   for (i=0;i<m;i++) {
2864     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2865     I    = i + rstart;
2866     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2867     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2868   }
2869   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2870   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2871   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2872 
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 #undef __FUNCT__
2877 #define __FUNCT__ "MatFileSplit"
2878 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2879 {
2880   PetscErrorCode    ierr;
2881   PetscMPIInt       rank;
2882   PetscInt          m,N,i,rstart,nnz;
2883   size_t            len;
2884   const PetscInt    *indx;
2885   PetscViewer       out;
2886   char              *name;
2887   Mat               B;
2888   const PetscScalar *values;
2889 
2890   PetscFunctionBegin;
2891   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2892   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2893   /* Should this be the type of the diagonal block of A? */
2894   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2895   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2896   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2897   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2898   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2899   for (i=0;i<m;i++) {
2900     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2901     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2902     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2903   }
2904   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906 
2907   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2908   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2909   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2910   sprintf(name,"%s.%d",outfile,rank);
2911   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2912   ierr = PetscFree(name);
2913   ierr = MatView(B,out);CHKERRQ(ierr);
2914   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2915   ierr = MatDestroy(B);CHKERRQ(ierr);
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2920 #undef __FUNCT__
2921 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2922 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2923 {
2924   PetscErrorCode       ierr;
2925   Mat_Merge_SeqsToMPI  *merge;
2926   PetscObjectContainer container;
2927 
2928   PetscFunctionBegin;
2929   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2930   if (container) {
2931     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2935     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2936     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2937     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2938     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2939     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2940     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2941     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2942 
2943     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2944     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2945   }
2946   ierr = PetscFree(merge);CHKERRQ(ierr);
2947 
2948   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2949   PetscFunctionReturn(0);
2950 }
2951 
2952 #include "src/mat/utils/freespace.h"
2953 #include "petscbt.h"
2954 static PetscEvent logkey_seqstompinum = 0;
2955 #undef __FUNCT__
2956 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2957 /*@C
2958       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2959                  matrices from each processor
2960 
2961     Collective on MPI_Comm
2962 
2963    Input Parameters:
2964 +    comm - the communicators the parallel matrix will live on
2965 .    seqmat - the input sequential matrices
2966 .    m - number of local rows (or PETSC_DECIDE)
2967 .    n - number of local columns (or PETSC_DECIDE)
2968 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2969 
2970    Output Parameter:
2971 .    mpimat - the parallel matrix generated
2972 
2973     Level: advanced
2974 
2975    Notes:
2976      The dimensions of the sequential matrix in each processor MUST be the same.
2977      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2978      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2979 @*/
2980 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2981 {
2982   PetscErrorCode       ierr;
2983   MPI_Comm             comm=mpimat->comm;
2984   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2985   PetscMPIInt          size,rank,taga,*len_s;
2986   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
2987   PetscInt             proc,m;
2988   PetscInt             **buf_ri,**buf_rj;
2989   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2990   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2991   MPI_Request          *s_waits,*r_waits;
2992   MPI_Status           *status;
2993   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2994   Mat_Merge_SeqsToMPI  *merge;
2995   PetscObjectContainer container;
2996 
2997   PetscFunctionBegin;
2998   if (!logkey_seqstompinum) {
2999     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
3000   }
3001   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3002 
3003   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3004   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3005 
3006   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3007   if (container) {
3008     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3009   }
3010   bi     = merge->bi;
3011   bj     = merge->bj;
3012   buf_ri = merge->buf_ri;
3013   buf_rj = merge->buf_rj;
3014 
3015   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3016   owners = merge->rowmap.range;
3017   len_s  = merge->len_s;
3018 
3019   /* send and recv matrix values */
3020   /*-----------------------------*/
3021   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3022   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3023 
3024   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3025   for (proc=0,k=0; proc<size; proc++){
3026     if (!len_s[proc]) continue;
3027     i = owners[proc];
3028     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3029     k++;
3030   }
3031 
3032   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3033   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3034   ierr = PetscFree(status);CHKERRQ(ierr);
3035 
3036   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3037   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3038 
3039   /* insert mat values of mpimat */
3040   /*----------------------------*/
3041   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3042   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3043   nextrow = buf_ri_k + merge->nrecv;
3044   nextai  = nextrow + merge->nrecv;
3045 
3046   for (k=0; k<merge->nrecv; k++){
3047     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3048     nrows = *(buf_ri_k[k]);
3049     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3050     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3051   }
3052 
3053   /* set values of ba */
3054   m = merge->rowmap.n;
3055   for (i=0; i<m; i++) {
3056     arow = owners[rank] + i;
3057     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3058     bnzi = bi[i+1] - bi[i];
3059     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3060 
3061     /* add local non-zero vals of this proc's seqmat into ba */
3062     anzi = ai[arow+1] - ai[arow];
3063     aj   = a->j + ai[arow];
3064     aa   = a->a + ai[arow];
3065     nextaj = 0;
3066     for (j=0; nextaj<anzi; j++){
3067       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3068         ba_i[j] += aa[nextaj++];
3069       }
3070     }
3071 
3072     /* add received vals into ba */
3073     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3074       /* i-th row */
3075       if (i == *nextrow[k]) {
3076         anzi = *(nextai[k]+1) - *nextai[k];
3077         aj   = buf_rj[k] + *(nextai[k]);
3078         aa   = abuf_r[k] + *(nextai[k]);
3079         nextaj = 0;
3080         for (j=0; nextaj<anzi; j++){
3081           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3082             ba_i[j] += aa[nextaj++];
3083           }
3084         }
3085         nextrow[k]++; nextai[k]++;
3086       }
3087     }
3088     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3089   }
3090   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3091   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3092 
3093   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3094   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3095   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3096   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3097   PetscFunctionReturn(0);
3098 }
3099 
3100 static PetscEvent logkey_seqstompisym = 0;
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3103 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3104 {
3105   PetscErrorCode       ierr;
3106   Mat                  B_mpi;
3107   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3108   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3109   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3110   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3111   PetscInt             len,proc,*dnz,*onz;
3112   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3113   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3114   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3115   MPI_Status           *status;
3116   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3117   PetscBT              lnkbt;
3118   Mat_Merge_SeqsToMPI  *merge;
3119   PetscObjectContainer container;
3120 
3121   PetscFunctionBegin;
3122   if (!logkey_seqstompisym) {
3123     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3124   }
3125   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3126 
3127   /* make sure it is a PETSc comm */
3128   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3129   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3130   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3131 
3132   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3133   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3134 
3135   /* determine row ownership */
3136   /*---------------------------------------------------------*/
3137   merge->rowmap.n = m;
3138   merge->rowmap.N = M;
3139   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3140   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3141   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3142 
3143   m      = merge->rowmap.n;
3144   M      = merge->rowmap.N;
3145   owners = merge->rowmap.range;
3146 
3147   /* determine the number of messages to send, their lengths */
3148   /*---------------------------------------------------------*/
3149   len_s  = merge->len_s;
3150 
3151   len = 0;  /* length of buf_si[] */
3152   merge->nsend = 0;
3153   for (proc=0; proc<size; proc++){
3154     len_si[proc] = 0;
3155     if (proc == rank){
3156       len_s[proc] = 0;
3157     } else {
3158       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3159       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3160     }
3161     if (len_s[proc]) {
3162       merge->nsend++;
3163       nrows = 0;
3164       for (i=owners[proc]; i<owners[proc+1]; i++){
3165         if (ai[i+1] > ai[i]) nrows++;
3166       }
3167       len_si[proc] = 2*(nrows+1);
3168       len += len_si[proc];
3169     }
3170   }
3171 
3172   /* determine the number and length of messages to receive for ij-structure */
3173   /*-------------------------------------------------------------------------*/
3174   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3175   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3176 
3177   /* post the Irecv of j-structure */
3178   /*-------------------------------*/
3179   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr);
3180   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3181 
3182   /* post the Isend of j-structure */
3183   /*--------------------------------*/
3184   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3185   sj_waits = si_waits + merge->nsend;
3186 
3187   for (proc=0, k=0; proc<size; proc++){
3188     if (!len_s[proc]) continue;
3189     i = owners[proc];
3190     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3191     k++;
3192   }
3193 
3194   /* receives and sends of j-structure are complete */
3195   /*------------------------------------------------*/
3196   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3197   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3198 
3199   /* send and recv i-structure */
3200   /*---------------------------*/
3201   ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr);
3202   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3203 
3204   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3205   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3206   for (proc=0,k=0; proc<size; proc++){
3207     if (!len_s[proc]) continue;
3208     /* form outgoing message for i-structure:
3209          buf_si[0]:                 nrows to be sent
3210                [1:nrows]:           row index (global)
3211                [nrows+1:2*nrows+1]: i-structure index
3212     */
3213     /*-------------------------------------------*/
3214     nrows = len_si[proc]/2 - 1;
3215     buf_si_i    = buf_si + nrows+1;
3216     buf_si[0]   = nrows;
3217     buf_si_i[0] = 0;
3218     nrows = 0;
3219     for (i=owners[proc]; i<owners[proc+1]; i++){
3220       anzi = ai[i+1] - ai[i];
3221       if (anzi) {
3222         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3223         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3224         nrows++;
3225       }
3226     }
3227     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3228     k++;
3229     buf_si += len_si[proc];
3230   }
3231 
3232   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3233   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3234 
3235   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3236   for (i=0; i<merge->nrecv; i++){
3237     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3238   }
3239 
3240   ierr = PetscFree(len_si);CHKERRQ(ierr);
3241   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3242   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3244   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3245   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3246   ierr = PetscFree(status);CHKERRQ(ierr);
3247 
3248   /* compute a local seq matrix in each processor */
3249   /*----------------------------------------------*/
3250   /* allocate bi array and free space for accumulating nonzero column info */
3251   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3252   bi[0] = 0;
3253 
3254   /* create and initialize a linked list */
3255   nlnk = N+1;
3256   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3257 
3258   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3259   len = 0;
3260   len  = ai[owners[rank+1]] - ai[owners[rank]];
3261   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3262   current_space = free_space;
3263 
3264   /* determine symbolic info for each local row */
3265   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3266   nextrow = buf_ri_k + merge->nrecv;
3267   nextai  = nextrow + merge->nrecv;
3268   for (k=0; k<merge->nrecv; k++){
3269     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3270     nrows = *buf_ri_k[k];
3271     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3272     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3273   }
3274 
3275   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3276   len = 0;
3277   for (i=0;i<m;i++) {
3278     bnzi   = 0;
3279     /* add local non-zero cols of this proc's seqmat into lnk */
3280     arow   = owners[rank] + i;
3281     anzi   = ai[arow+1] - ai[arow];
3282     aj     = a->j + ai[arow];
3283     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3284     bnzi += nlnk;
3285     /* add received col data into lnk */
3286     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3287       if (i == *nextrow[k]) { /* i-th row */
3288         anzi = *(nextai[k]+1) - *nextai[k];
3289         aj   = buf_rj[k] + *nextai[k];
3290         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3291         bnzi += nlnk;
3292         nextrow[k]++; nextai[k]++;
3293       }
3294     }
3295     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3296 
3297     /* if free space is not available, make more free space */
3298     if (current_space->local_remaining<bnzi) {
3299       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3300       nspacedouble++;
3301     }
3302     /* copy data into free space, then initialize lnk */
3303     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3304     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3305 
3306     current_space->array           += bnzi;
3307     current_space->local_used      += bnzi;
3308     current_space->local_remaining -= bnzi;
3309 
3310     bi[i+1] = bi[i] + bnzi;
3311   }
3312 
3313   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3314 
3315   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3316   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3317   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3318 
3319   /* create symbolic parallel matrix B_mpi */
3320   /*---------------------------------------*/
3321   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3322   if (n==PETSC_DECIDE) {
3323     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3324   } else {
3325     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3326   }
3327   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3328   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3329   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3330 
3331   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3332   B_mpi->assembled     = PETSC_FALSE;
3333   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3334   merge->bi            = bi;
3335   merge->bj            = bj;
3336   merge->buf_ri        = buf_ri;
3337   merge->buf_rj        = buf_rj;
3338   merge->coi           = PETSC_NULL;
3339   merge->coj           = PETSC_NULL;
3340   merge->owners_co     = PETSC_NULL;
3341 
3342   /* attach the supporting struct to B_mpi for reuse */
3343   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3344   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3345   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3346   *mpimat = B_mpi;
3347 
3348   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3349   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 
3353 static PetscEvent logkey_seqstompi = 0;
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatMerge_SeqsToMPI"
3356 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3357 {
3358   PetscErrorCode   ierr;
3359 
3360   PetscFunctionBegin;
3361   if (!logkey_seqstompi) {
3362     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3363   }
3364   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3365   if (scall == MAT_INITIAL_MATRIX){
3366     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3367   }
3368   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3369   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 static PetscEvent logkey_getlocalmat = 0;
3373 #undef __FUNCT__
3374 #define __FUNCT__ "MatGetLocalMat"
3375 /*@C
3376      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3377 
3378     Not Collective
3379 
3380    Input Parameters:
3381 +    A - the matrix
3382 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3383 
3384    Output Parameter:
3385 .    A_loc - the local sequential matrix generated
3386 
3387     Level: developer
3388 
3389 @*/
3390 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3391 {
3392   PetscErrorCode  ierr;
3393   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3394   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3395   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3396   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3397   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3398   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3399 
3400   PetscFunctionBegin;
3401   if (!logkey_getlocalmat) {
3402     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3403   }
3404   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3405   if (scall == MAT_INITIAL_MATRIX){
3406     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3407     ci[0] = 0;
3408     for (i=0; i<am; i++){
3409       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3410     }
3411     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3412     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3413     k = 0;
3414     for (i=0; i<am; i++) {
3415       ncols_o = bi[i+1] - bi[i];
3416       ncols_d = ai[i+1] - ai[i];
3417       /* off-diagonal portion of A */
3418       for (jo=0; jo<ncols_o; jo++) {
3419         col = cmap[*bj];
3420         if (col >= cstart) break;
3421         cj[k]   = col; bj++;
3422         ca[k++] = *ba++;
3423       }
3424       /* diagonal portion of A */
3425       for (j=0; j<ncols_d; j++) {
3426         cj[k]   = cstart + *aj++;
3427         ca[k++] = *aa++;
3428       }
3429       /* off-diagonal portion of A */
3430       for (j=jo; j<ncols_o; j++) {
3431         cj[k]   = cmap[*bj++];
3432         ca[k++] = *ba++;
3433       }
3434     }
3435     /* put together the new matrix */
3436     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3437     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3438     /* Since these are PETSc arrays, change flags to free them as necessary. */
3439     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3440     mat->freedata = PETSC_TRUE;
3441     mat->nonew    = 0;
3442   } else if (scall == MAT_REUSE_MATRIX){
3443     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3444     ci = mat->i; cj = mat->j; ca = mat->a;
3445     for (i=0; i<am; i++) {
3446       /* off-diagonal portion of A */
3447       ncols_o = bi[i+1] - bi[i];
3448       for (jo=0; jo<ncols_o; jo++) {
3449         col = cmap[*bj];
3450         if (col >= cstart) break;
3451         *ca++ = *ba++; bj++;
3452       }
3453       /* diagonal portion of A */
3454       ncols_d = ai[i+1] - ai[i];
3455       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3456       /* off-diagonal portion of A */
3457       for (j=jo; j<ncols_o; j++) {
3458         *ca++ = *ba++; bj++;
3459       }
3460     }
3461   } else {
3462     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3463   }
3464 
3465   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 static PetscEvent logkey_getlocalmatcondensed = 0;
3470 #undef __FUNCT__
3471 #define __FUNCT__ "MatGetLocalMatCondensed"
3472 /*@C
3473      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3474 
3475     Not Collective
3476 
3477    Input Parameters:
3478 +    A - the matrix
3479 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3480 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3481 
3482    Output Parameter:
3483 .    A_loc - the local sequential matrix generated
3484 
3485     Level: developer
3486 
3487 @*/
3488 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3489 {
3490   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3491   PetscErrorCode    ierr;
3492   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3493   IS                isrowa,iscola;
3494   Mat               *aloc;
3495 
3496   PetscFunctionBegin;
3497   if (!logkey_getlocalmatcondensed) {
3498     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3499   }
3500   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3501   if (!row){
3502     start = A->rmap.rstart; end = A->rmap.rend;
3503     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3504   } else {
3505     isrowa = *row;
3506   }
3507   if (!col){
3508     start = A->cmap.rstart;
3509     cmap  = a->garray;
3510     nzA   = a->A->cmap.n;
3511     nzB   = a->B->cmap.n;
3512     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3513     ncols = 0;
3514     for (i=0; i<nzB; i++) {
3515       if (cmap[i] < start) idx[ncols++] = cmap[i];
3516       else break;
3517     }
3518     imark = i;
3519     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3520     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3521     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3522     ierr = PetscFree(idx);CHKERRQ(ierr);
3523   } else {
3524     iscola = *col;
3525   }
3526   if (scall != MAT_INITIAL_MATRIX){
3527     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3528     aloc[0] = *A_loc;
3529   }
3530   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3531   *A_loc = aloc[0];
3532   ierr = PetscFree(aloc);CHKERRQ(ierr);
3533   if (!row){
3534     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3535   }
3536   if (!col){
3537     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3538   }
3539   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 static PetscEvent logkey_GetBrowsOfAcols = 0;
3544 #undef __FUNCT__
3545 #define __FUNCT__ "MatGetBrowsOfAcols"
3546 /*@C
3547     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3548 
3549     Collective on Mat
3550 
3551    Input Parameters:
3552 +    A,B - the matrices in mpiaij format
3553 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3554 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3555 
3556    Output Parameter:
3557 +    rowb, colb - index sets of rows and columns of B to extract
3558 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3559 -    B_seq - the sequential matrix generated
3560 
3561     Level: developer
3562 
3563 @*/
3564 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3565 {
3566   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3567   PetscErrorCode    ierr;
3568   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3569   IS                isrowb,iscolb;
3570   Mat               *bseq;
3571 
3572   PetscFunctionBegin;
3573   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3574     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3575   }
3576   if (!logkey_GetBrowsOfAcols) {
3577     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3578   }
3579   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3580 
3581   if (scall == MAT_INITIAL_MATRIX){
3582     start = A->cmap.rstart;
3583     cmap  = a->garray;
3584     nzA   = a->A->cmap.n;
3585     nzB   = a->B->cmap.n;
3586     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3587     ncols = 0;
3588     for (i=0; i<nzB; i++) {  /* row < local row index */
3589       if (cmap[i] < start) idx[ncols++] = cmap[i];
3590       else break;
3591     }
3592     imark = i;
3593     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3594     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3595     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3596     ierr = PetscFree(idx);CHKERRQ(ierr);
3597     *brstart = imark;
3598     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3599   } else {
3600     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3601     isrowb = *rowb; iscolb = *colb;
3602     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3603     bseq[0] = *B_seq;
3604   }
3605   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3606   *B_seq = bseq[0];
3607   ierr = PetscFree(bseq);CHKERRQ(ierr);
3608   if (!rowb){
3609     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3610   } else {
3611     *rowb = isrowb;
3612   }
3613   if (!colb){
3614     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3615   } else {
3616     *colb = iscolb;
3617   }
3618   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3619   PetscFunctionReturn(0);
3620 }
3621 
3622 static PetscEvent logkey_GetBrowsOfAocols = 0;
3623 #undef __FUNCT__
3624 #define __FUNCT__ "MatGetBrowsOfAoCols"
3625 /*@C
3626     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3627     of the OFF-DIAGONAL portion of local A
3628 
3629     Collective on Mat
3630 
3631    Input Parameters:
3632 +    A,B - the matrices in mpiaij format
3633 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3634 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3635 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3636 
3637    Output Parameter:
3638 +    B_oth - the sequential matrix generated
3639 
3640     Level: developer
3641 
3642 @*/
3643 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3644 {
3645   VecScatter_MPI_General *gen_to,*gen_from;
3646   PetscErrorCode         ierr;
3647   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3648   Mat_SeqAIJ             *b_oth;
3649   VecScatter             ctx=a->Mvctx;
3650   MPI_Comm               comm=ctx->comm;
3651   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3652   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3653   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3654   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3655   MPI_Request            *rwaits,*swaits;
3656   MPI_Status             *sstatus,rstatus;
3657   PetscInt               *cols;
3658   PetscScalar            *vals;
3659   PetscMPIInt            j;
3660 
3661   PetscFunctionBegin;
3662   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3663     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3664   }
3665   if (!logkey_GetBrowsOfAocols) {
3666     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3667   }
3668   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3669   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3670 
3671   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3672   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3673   rvalues  = gen_from->values; /* holds the length of sending row */
3674   svalues  = gen_to->values;   /* holds the length of receiving row */
3675   nrecvs   = gen_from->n;
3676   nsends   = gen_to->n;
3677   rwaits   = gen_from->requests;
3678   swaits   = gen_to->requests;
3679   srow     = gen_to->indices;   /* local row index to be sent */
3680   rstarts  = gen_from->starts;
3681   sstarts  = gen_to->starts;
3682   rprocs   = gen_from->procs;
3683   sprocs   = gen_to->procs;
3684   sstatus  = gen_to->sstatus;
3685 
3686   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3687   if (scall == MAT_INITIAL_MATRIX){
3688     /* i-array */
3689     /*---------*/
3690     /*  post receives */
3691     for (i=0; i<nrecvs; i++){
3692       rowlen = (PetscInt*)rvalues + rstarts[i];
3693       nrows = rstarts[i+1]-rstarts[i];
3694       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3695     }
3696 
3697     /* pack the outgoing message */
3698     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3699     rstartsj = sstartsj + nsends +1;
3700     sstartsj[0] = 0;  rstartsj[0] = 0;
3701     len = 0; /* total length of j or a array to be sent */
3702     k = 0;
3703     for (i=0; i<nsends; i++){
3704       rowlen = (PetscInt*)svalues + sstarts[i];
3705       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3706       for (j=0; j<nrows; j++) {
3707         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3708         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3709         len += rowlen[j];
3710         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3711         k++;
3712       }
3713       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3714        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3715     }
3716     /* recvs and sends of i-array are completed */
3717     i = nrecvs;
3718     while (i--) {
3719       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3720     }
3721     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3722     /* allocate buffers for sending j and a arrays */
3723     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3724     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3725 
3726     /* create i-array of B_oth */
3727     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3728     b_othi[0] = 0;
3729     len = 0; /* total length of j or a array to be received */
3730     k = 0;
3731     for (i=0; i<nrecvs; i++){
3732       rowlen = (PetscInt*)rvalues + rstarts[i];
3733       nrows = rstarts[i+1]-rstarts[i];
3734       for (j=0; j<nrows; j++) {
3735         b_othi[k+1] = b_othi[k] + rowlen[j];
3736         len += rowlen[j]; k++;
3737       }
3738       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3739     }
3740 
3741     /* allocate space for j and a arrrays of B_oth */
3742     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3743     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3744 
3745     /* j-array */
3746     /*---------*/
3747     /*  post receives of j-array */
3748     for (i=0; i<nrecvs; i++){
3749       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3750       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3751     }
3752     k = 0;
3753     for (i=0; i<nsends; i++){
3754       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3755       bufJ = bufj+sstartsj[i];
3756       for (j=0; j<nrows; j++) {
3757         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3758         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3759         for (l=0; l<ncols; l++){
3760           *bufJ++ = cols[l];
3761         }
3762         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3763       }
3764       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3765     }
3766 
3767     /* recvs and sends of j-array are completed */
3768     i = nrecvs;
3769     while (i--) {
3770       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3771     }
3772     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3773   } else if (scall == MAT_REUSE_MATRIX){
3774     sstartsj = *startsj;
3775     rstartsj = sstartsj + nsends +1;
3776     bufa     = *bufa_ptr;
3777     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3778     b_otha   = b_oth->a;
3779   } else {
3780     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3781   }
3782 
3783   /* a-array */
3784   /*---------*/
3785   /*  post receives of a-array */
3786   for (i=0; i<nrecvs; i++){
3787     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3788     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3789   }
3790   k = 0;
3791   for (i=0; i<nsends; i++){
3792     nrows = sstarts[i+1]-sstarts[i];
3793     bufA = bufa+sstartsj[i];
3794     for (j=0; j<nrows; j++) {
3795       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3796       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3797       for (l=0; l<ncols; l++){
3798         *bufA++ = vals[l];
3799       }
3800       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3801 
3802     }
3803     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3804   }
3805   /* recvs and sends of a-array are completed */
3806   i = nrecvs;
3807   while (i--) {
3808     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3809   }
3810    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3811 
3812   if (scall == MAT_INITIAL_MATRIX){
3813     /* put together the new matrix */
3814     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3815 
3816     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3817     /* Since these are PETSc arrays, change flags to free them as necessary. */
3818     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3819     b_oth->freedata = PETSC_TRUE;
3820     b_oth->nonew    = 0;
3821 
3822     ierr = PetscFree(bufj);CHKERRQ(ierr);
3823     if (!startsj || !bufa_ptr){
3824       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3825       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3826     } else {
3827       *startsj  = sstartsj;
3828       *bufa_ptr = bufa;
3829     }
3830   }
3831   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3832 
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatGetCommunicationStructs"
3838 /*@C
3839   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3840 
3841   Not Collective
3842 
3843   Input Parameters:
3844 . A - The matrix in mpiaij format
3845 
3846   Output Parameter:
3847 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3848 . colmap - A map from global column index to local index into lvec
3849 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3850 
3851   Level: developer
3852 
3853 @*/
3854 #if defined (PETSC_USE_CTABLE)
3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3856 #else
3857 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3858 #endif
3859 {
3860   Mat_MPIAIJ *a;
3861 
3862   PetscFunctionBegin;
3863   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3864   PetscValidPointer(lvec, 2)
3865   PetscValidPointer(colmap, 3)
3866   PetscValidPointer(multScatter, 4)
3867   a = (Mat_MPIAIJ *) A->data;
3868   if (lvec) *lvec = a->lvec;
3869   if (colmap) *colmap = a->colmap;
3870   if (multScatter) *multScatter = a->Mvctx;
3871   PetscFunctionReturn(0);
3872 }
3873 
3874 /*MC
3875    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3876 
3877    Options Database Keys:
3878 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3879 
3880   Level: beginner
3881 
3882 .seealso: MatCreateMPIAIJ
3883 M*/
3884 
3885 EXTERN_C_BEGIN
3886 #undef __FUNCT__
3887 #define __FUNCT__ "MatCreate_MPIAIJ"
3888 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3889 {
3890   Mat_MPIAIJ     *b;
3891   PetscErrorCode ierr;
3892   PetscMPIInt    size;
3893 
3894   PetscFunctionBegin;
3895   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3896 
3897   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3898   B->data         = (void*)b;
3899   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3900   B->factor       = 0;
3901   B->rmap.bs      = 1;
3902   B->assembled    = PETSC_FALSE;
3903   B->mapping      = 0;
3904 
3905   B->insertmode      = NOT_SET_VALUES;
3906   b->size            = size;
3907   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3908 
3909   /* build cache for off array entries formed */
3910   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3911   b->donotstash  = PETSC_FALSE;
3912   b->colmap      = 0;
3913   b->garray      = 0;
3914   b->roworiented = PETSC_TRUE;
3915 
3916   /* stuff used for matrix vector multiply */
3917   b->lvec      = PETSC_NULL;
3918   b->Mvctx     = PETSC_NULL;
3919 
3920   /* stuff for MatGetRow() */
3921   b->rowindices   = 0;
3922   b->rowvalues    = 0;
3923   b->getrowactive = PETSC_FALSE;
3924 
3925 
3926   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3927                                      "MatStoreValues_MPIAIJ",
3928                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3929   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3930                                      "MatRetrieveValues_MPIAIJ",
3931                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3932   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3933 				     "MatGetDiagonalBlock_MPIAIJ",
3934                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3935   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3936 				     "MatIsTranspose_MPIAIJ",
3937 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3938   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3939 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3940 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3941   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3942 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3943 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3944   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3945 				     "MatDiagonalScaleLocal_MPIAIJ",
3946 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 EXTERN_C_END
3950 
3951 /*
3952     Special version for direct calls from Fortran
3953 */
3954 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3955 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3957 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3958 #endif
3959 
3960 /* Change these macros so can be used in void function */
3961 #undef CHKERRQ
3962 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3963 #undef SETERRQ2
3964 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3965 #undef SETERRQ
3966 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3967 
3968 EXTERN_C_BEGIN
3969 #undef __FUNCT__
3970 #define __FUNCT__ "matsetvaluesmpiaij_"
3971 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3972 {
3973   Mat            mat = *mmat;
3974   PetscInt       m = *mm, n = *mn;
3975   InsertMode     addv = *maddv;
3976   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3977   PetscScalar    value;
3978   PetscErrorCode ierr;
3979 
3980   MatPreallocated(mat);
3981   if (mat->insertmode == NOT_SET_VALUES) {
3982     mat->insertmode = addv;
3983   }
3984 #if defined(PETSC_USE_DEBUG)
3985   else if (mat->insertmode != addv) {
3986     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3987   }
3988 #endif
3989   {
3990   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
3991   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
3992   PetscTruth     roworiented = aij->roworiented;
3993 
3994   /* Some Variables required in the macro */
3995   Mat            A = aij->A;
3996   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
3997   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
3998   PetscScalar    *aa = a->a;
3999   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4000   Mat            B = aij->B;
4001   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4002   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4003   PetscScalar    *ba = b->a;
4004 
4005   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4006   PetscInt       nonew = a->nonew;
4007   PetscScalar    *ap1,*ap2;
4008 
4009   PetscFunctionBegin;
4010   for (i=0; i<m; i++) {
4011     if (im[i] < 0) continue;
4012 #if defined(PETSC_USE_DEBUG)
4013     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4014 #endif
4015     if (im[i] >= rstart && im[i] < rend) {
4016       row      = im[i] - rstart;
4017       lastcol1 = -1;
4018       rp1      = aj + ai[row];
4019       ap1      = aa + ai[row];
4020       rmax1    = aimax[row];
4021       nrow1    = ailen[row];
4022       low1     = 0;
4023       high1    = nrow1;
4024       lastcol2 = -1;
4025       rp2      = bj + bi[row];
4026       ap2      = ba + bi[row];
4027       rmax2    = bimax[row];
4028       nrow2    = bilen[row];
4029       low2     = 0;
4030       high2    = nrow2;
4031 
4032       for (j=0; j<n; j++) {
4033         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4034         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4035         if (in[j] >= cstart && in[j] < cend){
4036           col = in[j] - cstart;
4037           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4038         } else if (in[j] < 0) continue;
4039 #if defined(PETSC_USE_DEBUG)
4040         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4041 #endif
4042         else {
4043           if (mat->was_assembled) {
4044             if (!aij->colmap) {
4045               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4046             }
4047 #if defined (PETSC_USE_CTABLE)
4048             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4049 	    col--;
4050 #else
4051             col = aij->colmap[in[j]] - 1;
4052 #endif
4053             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4054               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4055               col =  in[j];
4056               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4057               B = aij->B;
4058               b = (Mat_SeqAIJ*)B->data;
4059               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4060               rp2      = bj + bi[row];
4061               ap2      = ba + bi[row];
4062               rmax2    = bimax[row];
4063               nrow2    = bilen[row];
4064               low2     = 0;
4065               high2    = nrow2;
4066               bm       = aij->B->rmap.n;
4067               ba = b->a;
4068             }
4069           } else col = in[j];
4070           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4071         }
4072       }
4073     } else {
4074       if (!aij->donotstash) {
4075         if (roworiented) {
4076           if (ignorezeroentries && v[i*n] == 0.0) continue;
4077           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4078         } else {
4079           if (ignorezeroentries && v[i] == 0.0) continue;
4080           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4081         }
4082       }
4083     }
4084   }}
4085   PetscFunctionReturnVoid();
4086 }
4087 EXTERN_C_END
4088