xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 3dfa136d8b789e6905fd401804d07a78b7147c92)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->cmap.n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->cmap.N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
141   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->rmap.n;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
246   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
369   aij->rowvalues = 0;
370 
371   /* used by MatAXPY() */
372   a->xtoy = 0; b->xtoy = 0;
373   a->XtoY = 0; b->XtoY = 0;
374 
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
380 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
381 {
382   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
383   PetscErrorCode ierr;
384 
385   PetscFunctionBegin;
386   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
387   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatZeroRows_MPIAIJ"
393 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
394 {
395   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
396   PetscErrorCode ierr;
397   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
398   PetscInt       i,*owners = A->rmap.range;
399   PetscInt       *nprocs,j,idx,nsends,row;
400   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
401   PetscInt       *rvalues,count,base,slen,*source;
402   PetscInt       *lens,*lrows,*values,rstart=A->rmap.rstart;
403   MPI_Comm       comm = A->comm;
404   MPI_Request    *send_waits,*recv_waits;
405   MPI_Status     recv_status,*send_status;
406 #if defined(PETSC_DEBUG)
407   PetscTruth     found = PETSC_FALSE;
408 #endif
409 
410   PetscFunctionBegin;
411   /*  first count number of contributors to each processor */
412   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
413   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
414   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
415   j = 0;
416   for (i=0; i<N; i++) {
417     if (lastidx > (idx = rows[i])) j = 0;
418     lastidx = idx;
419     for (; j<size; j++) {
420       if (idx >= owners[j] && idx < owners[j+1]) {
421         nprocs[2*j]++;
422         nprocs[2*j+1] = 1;
423         owner[i] = j;
424 #if defined(PETSC_DEBUG)
425         found = PETSC_TRUE;
426 #endif
427         break;
428       }
429     }
430 #if defined(PETSC_DEBUG)
431     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
432     found = PETSC_FALSE;
433 #endif
434   }
435   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
436 
437   /* inform other processors of number of messages and max length*/
438   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
439 
440   /* post receives:   */
441   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
442   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
443   for (i=0; i<nrecvs; i++) {
444     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
445   }
446 
447   /* do sends:
448       1) starts[i] gives the starting index in svalues for stuff going to
449          the ith processor
450   */
451   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
452   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
453   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
454   starts[0] = 0;
455   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
456   for (i=0; i<N; i++) {
457     svalues[starts[owner[i]]++] = rows[i];
458   }
459 
460   starts[0] = 0;
461   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
462   count = 0;
463   for (i=0; i<size; i++) {
464     if (nprocs[2*i+1]) {
465       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
466     }
467   }
468   ierr = PetscFree(starts);CHKERRQ(ierr);
469 
470   base = owners[rank];
471 
472   /*  wait on receives */
473   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
474   source = lens + nrecvs;
475   count  = nrecvs; slen = 0;
476   while (count) {
477     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
478     /* unpack receives into our local space */
479     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
480     source[imdex]  = recv_status.MPI_SOURCE;
481     lens[imdex]    = n;
482     slen          += n;
483     count--;
484   }
485   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
486 
487   /* move the data into the send scatter */
488   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
489   count = 0;
490   for (i=0; i<nrecvs; i++) {
491     values = rvalues + i*nmax;
492     for (j=0; j<lens[i]; j++) {
493       lrows[count++] = values[j] - base;
494     }
495   }
496   ierr = PetscFree(rvalues);CHKERRQ(ierr);
497   ierr = PetscFree(lens);CHKERRQ(ierr);
498   ierr = PetscFree(owner);CHKERRQ(ierr);
499   ierr = PetscFree(nprocs);CHKERRQ(ierr);
500 
501   /* actually zap the local rows */
502   /*
503         Zero the required rows. If the "diagonal block" of the matrix
504      is square and the user wishes to set the diagonal we use separate
505      code so that MatSetValues() is not called for each diagonal allocating
506      new memory, thus calling lots of mallocs and slowing things down.
507 
508        Contributed by: Matthew Knepley
509   */
510   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
511   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
512   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
513     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
514   } else if (diag != 0.0) {
515     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
516     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
517       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
518 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
519     }
520     for (i = 0; i < slen; i++) {
521       row  = lrows[i] + rstart;
522       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
523     }
524     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
525     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
526   } else {
527     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
528   }
529   ierr = PetscFree(lrows);CHKERRQ(ierr);
530 
531   /* wait on sends */
532   if (nsends) {
533     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
534     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
535     ierr = PetscFree(send_status);CHKERRQ(ierr);
536   }
537   ierr = PetscFree(send_waits);CHKERRQ(ierr);
538   ierr = PetscFree(svalues);CHKERRQ(ierr);
539 
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatMult_MPIAIJ"
545 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
546 {
547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
548   PetscErrorCode ierr;
549   PetscInt       nt;
550 
551   PetscFunctionBegin;
552   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
553   if (nt != A->cmap.n) {
554     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap.n,nt);
555   }
556   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
558   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatMultAdd_MPIAIJ"
565 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
566 {
567   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
568   PetscErrorCode ierr;
569 
570   PetscFunctionBegin;
571   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
572   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
573   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
580 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
583   PetscErrorCode ierr;
584   PetscTruth     merged;
585 
586   PetscFunctionBegin;
587   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
588   /* do nondiagonal part */
589   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
590   if (!merged) {
591     /* send it on its way */
592     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
593     /* do local part */
594     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
595     /* receive remote parts: note this assumes the values are not actually */
596     /* added in yy until the next line, */
597     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
598   } else {
599     /* do local part */
600     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
601     /* send it on its way */
602     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
603     /* values actually were received in the Begin() but we need to call this nop */
604     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 EXTERN_C_BEGIN
610 #undef __FUNCT__
611 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
612 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
613 {
614   MPI_Comm       comm;
615   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
616   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
617   IS             Me,Notme;
618   PetscErrorCode ierr;
619   PetscInt       M,N,first,last,*notme,i;
620   PetscMPIInt    size;
621 
622   PetscFunctionBegin;
623 
624   /* Easy test: symmetric diagonal block */
625   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
626   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
627   if (!*f) PetscFunctionReturn(0);
628   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
630   if (size == 1) PetscFunctionReturn(0);
631 
632   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
633   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
634   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
635   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
636   for (i=0; i<first; i++) notme[i] = i;
637   for (i=last; i<M; i++) notme[i-last+first] = i;
638   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
639   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
640   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
641   Aoff = Aoffs[0];
642   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
643   Boff = Boffs[0];
644   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
645   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
646   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
647   ierr = ISDestroy(Me);CHKERRQ(ierr);
648   ierr = ISDestroy(Notme);CHKERRQ(ierr);
649 
650   PetscFunctionReturn(0);
651 }
652 EXTERN_C_END
653 
654 #undef __FUNCT__
655 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
656 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
657 {
658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
659   PetscErrorCode ierr;
660 
661   PetscFunctionBegin;
662   /* do nondiagonal part */
663   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
664   /* send it on its way */
665   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
666   /* do local part */
667   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
668   /* receive remote parts */
669   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 /*
674   This only works correctly for square matrices where the subblock A->A is the
675    diagonal block
676 */
677 #undef __FUNCT__
678 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
679 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
680 {
681   PetscErrorCode ierr;
682   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
683 
684   PetscFunctionBegin;
685   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
686   if (A->rmap.rstart != A->cmap.rstart || A->rmap.rend != A->cmap.rend) {
687     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
688   }
689   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
690   PetscFunctionReturn(0);
691 }
692 
693 #undef __FUNCT__
694 #define __FUNCT__ "MatScale_MPIAIJ"
695 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
696 {
697   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
698   PetscErrorCode ierr;
699 
700   PetscFunctionBegin;
701   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
702   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704 }
705 
706 #undef __FUNCT__
707 #define __FUNCT__ "MatDestroy_MPIAIJ"
708 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
709 {
710   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
711   PetscErrorCode ierr;
712 
713   PetscFunctionBegin;
714 #if defined(PETSC_USE_LOG)
715   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap.N,mat->cmap.N);
716 #endif
717   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
718   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
719   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
720 #if defined (PETSC_USE_CTABLE)
721   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
722 #else
723   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
724 #endif
725   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
726   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
727   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
728   ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
729   ierr = PetscFree(aij);CHKERRQ(ierr);
730 
731   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
732   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
733   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPIAIJ_Binary"
743 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
746   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
747   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
750   int               fd;
751   PetscInt          nz,header[4],*row_lengths,*range=0,rlen,i;
752   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap.rstart,rnz;
753   PetscScalar       *column_values;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
757   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
758   nz   = A->nz + B->nz;
759   if (!rank) {
760     header[0] = MAT_FILE_COOKIE;
761     header[1] = mat->rmap.N;
762     header[2] = mat->cmap.N;
763     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
764     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
765     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     /* get largest number of rows any processor has */
767     rlen = mat->rmap.n;
768     range = mat->rmap.range;
769     for (i=1; i<size; i++) {
770       rlen = PetscMax(rlen,range[i+1] - range[i]);
771     }
772   } else {
773     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
774     rlen = mat->rmap.n;
775   }
776 
777   /* load up the local row counts */
778   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
779   for (i=0; i<mat->rmap.n; i++) {
780     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
781   }
782 
783   /* store the row lengths to the file */
784   if (!rank) {
785     MPI_Status status;
786     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap.n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
787     for (i=1; i<size; i++) {
788       rlen = range[i+1] - range[i];
789       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
790       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
791     }
792   } else {
793     ierr = MPI_Send(row_lengths,mat->rmap.n,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
794   }
795   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
796 
797   /* load up the local column indices */
798   nzmax = nz; /* )th processor needs space a largest processor needs */
799   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
800   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
801   cnt  = 0;
802   for (i=0; i<mat->rmap.n; i++) {
803     for (j=B->i[i]; j<B->i[i+1]; j++) {
804       if ( (col = garray[B->j[j]]) > cstart) break;
805       column_indices[cnt++] = col;
806     }
807     for (k=A->i[i]; k<A->i[i+1]; k++) {
808       column_indices[cnt++] = A->j[k] + cstart;
809     }
810     for (; j<B->i[i+1]; j++) {
811       column_indices[cnt++] = garray[B->j[j]];
812     }
813   }
814   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
815 
816   /* store the column indices to the file */
817   if (!rank) {
818     MPI_Status status;
819     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
820     for (i=1; i<size; i++) {
821       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
822       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
823       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
824       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
825     }
826   } else {
827     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
828     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
829   }
830   ierr = PetscFree(column_indices);CHKERRQ(ierr);
831 
832   /* load up the local column values */
833   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
834   cnt  = 0;
835   for (i=0; i<mat->rmap.n; i++) {
836     for (j=B->i[i]; j<B->i[i+1]; j++) {
837       if ( garray[B->j[j]] > cstart) break;
838       column_values[cnt++] = B->a[j];
839     }
840     for (k=A->i[i]; k<A->i[i+1]; k++) {
841       column_values[cnt++] = A->a[k];
842     }
843     for (; j<B->i[i+1]; j++) {
844       column_values[cnt++] = B->a[j];
845     }
846   }
847   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
848 
849   /* store the column values to the file */
850   if (!rank) {
851     MPI_Status status;
852     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
853     for (i=1; i<size; i++) {
854       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
855       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
856       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
857       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
858     }
859   } else {
860     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
861     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
862   }
863   ierr = PetscFree(column_values);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
869 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscMPIInt       rank = aij->rank,size = aij->size;
874   PetscTruth        isdraw,iascii,isbinary;
875   PetscViewer       sviewer;
876   PetscViewerFormat format;
877 
878   PetscFunctionBegin;
879   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
880   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo    info;
886       PetscTruth inodes;
887 
888       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
889       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
890       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
891       if (!inodes) {
892         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
893 					      rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
896 		    rank,mat->rmap.n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       }
898       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
899       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
900       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
901       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
902       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
903       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
904       PetscFunctionReturn(0);
905     } else if (format == PETSC_VIEWER_ASCII_INFO) {
906       PetscInt   inodecount,inodelimit,*inodes;
907       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
908       if (inodes) {
909         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
912       }
913       PetscFunctionReturn(0);
914     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
915       PetscFunctionReturn(0);
916     }
917   } else if (isbinary) {
918     if (size == 1) {
919       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
920       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
921     } else {
922       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
923     }
924     PetscFunctionReturn(0);
925   } else if (isdraw) {
926     PetscDraw  draw;
927     PetscTruth isnull;
928     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
929     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
930   }
931 
932   if (size == 1) {
933     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
934     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
935   } else {
936     /* assemble the entire matrix onto first processor. */
937     Mat         A;
938     Mat_SeqAIJ  *Aloc;
939     PetscInt    M = mat->rmap.N,N = mat->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
940     PetscScalar *a;
941 
942     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
943     if (!rank) {
944       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
945     } else {
946       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
947     }
948     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
949     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
950     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
951     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
952 
953     /* copy over the A part */
954     Aloc = (Mat_SeqAIJ*)aij->A->data;
955     m = aij->A->rmap.n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
956     row = mat->rmap.rstart;
957     for (i=0; i<ai[m]; i++) {aj[i] += mat->cmap.rstart ;}
958     for (i=0; i<m; i++) {
959       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
960       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
961     }
962     aj = Aloc->j;
963     for (i=0; i<ai[m]; i++) {aj[i] -= mat->cmap.rstart;}
964 
965     /* copy over the B part */
966     Aloc = (Mat_SeqAIJ*)aij->B->data;
967     m    = aij->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
968     row  = mat->rmap.rstart;
969     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
970     ct   = cols;
971     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
972     for (i=0; i<m; i++) {
973       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
974       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
975     }
976     ierr = PetscFree(ct);CHKERRQ(ierr);
977     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
978     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
979     /*
980        Everyone has to call to draw the matrix since the graphics waits are
981        synchronized across all processors that share the PetscDraw object
982     */
983     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
984     if (!rank) {
985       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
986       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
987     }
988     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
989     ierr = MatDestroy(A);CHKERRQ(ierr);
990   }
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatView_MPIAIJ"
996 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
997 {
998   PetscErrorCode ierr;
999   PetscTruth     iascii,isdraw,issocket,isbinary;
1000 
1001   PetscFunctionBegin;
1002   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1003   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1004   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1005   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1006   if (iascii || isdraw || isbinary || issocket) {
1007     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1008   } else {
1009     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1010   }
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatRelax_MPIAIJ"
1018 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1019 {
1020   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1021   PetscErrorCode ierr;
1022   Vec            bb1;
1023 
1024   PetscFunctionBegin;
1025   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1026 
1027   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1028 
1029   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1030     if (flag & SOR_ZERO_INITIAL_GUESS) {
1031       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1032       its--;
1033     }
1034 
1035     while (its--) {
1036       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1037       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038 
1039       /* update rhs: bb1 = bb - B*x */
1040       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1041       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1042 
1043       /* local sweep */
1044       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1045       CHKERRQ(ierr);
1046     }
1047   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1048     if (flag & SOR_ZERO_INITIAL_GUESS) {
1049       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1050       its--;
1051     }
1052     while (its--) {
1053       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1054       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055 
1056       /* update rhs: bb1 = bb - B*x */
1057       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1058       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1059 
1060       /* local sweep */
1061       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1062       CHKERRQ(ierr);
1063     }
1064   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1065     if (flag & SOR_ZERO_INITIAL_GUESS) {
1066       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1067       its--;
1068     }
1069     while (its--) {
1070       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1071       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1072 
1073       /* update rhs: bb1 = bb - B*x */
1074       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1075       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1076 
1077       /* local sweep */
1078       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1079       CHKERRQ(ierr);
1080     }
1081   } else {
1082     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1083   }
1084 
1085   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatPermute_MPIAIJ"
1091 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1092 {
1093   MPI_Comm       comm,pcomm;
1094   PetscInt       first,local_size,nrows,*rows;
1095   int            ntids;
1096   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1101   /* make a collective version of 'rowp' */
1102   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1103   if (pcomm==comm) {
1104     crowp = rowp;
1105   } else {
1106     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1107     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1108     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1109     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1110   }
1111   /* collect the global row permutation and invert it */
1112   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1113   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1114   if (pcomm!=comm) {
1115     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1116   }
1117   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1118   /* get the local target indices */
1119   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1120   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1121   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1122   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1123   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1124   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1125   /* the column permutation is so much easier;
1126      make a local version of 'colp' and invert it */
1127   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1128   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1129   if (ntids==1) {
1130     lcolp = colp;
1131   } else {
1132     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1133     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1134     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1135   }
1136   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1137   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1138   if (ntids>1) {
1139     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1140     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1141   }
1142   /* now we just get the submatrix */
1143   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1144   /* clean up */
1145   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1146   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1152 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1153 {
1154   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1155   Mat            A = mat->A,B = mat->B;
1156   PetscErrorCode ierr;
1157   PetscReal      isend[5],irecv[5];
1158 
1159   PetscFunctionBegin;
1160   info->block_size     = 1.0;
1161   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1163   isend[3] = info->memory;  isend[4] = info->mallocs;
1164   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else if (flag == MAT_GLOBAL_SUM) {
1181     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   }
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   info->rows_global       = (double)matin->rmap.N;
1192   info->columns_global    = (double)matin->cmap.N;
1193   info->rows_local        = (double)matin->rmap.n;
1194   info->columns_local     = (double)matin->cmap.N;
1195 
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSetOption_MPIAIJ"
1201 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1202 {
1203   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   switch (op) {
1208   case MAT_NO_NEW_NONZERO_LOCATIONS:
1209   case MAT_YES_NEW_NONZERO_LOCATIONS:
1210   case MAT_COLUMNS_UNSORTED:
1211   case MAT_COLUMNS_SORTED:
1212   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1213   case MAT_KEEP_ZEROED_ROWS:
1214   case MAT_NEW_NONZERO_LOCATION_ERR:
1215   case MAT_USE_INODES:
1216   case MAT_DO_NOT_USE_INODES:
1217   case MAT_IGNORE_ZERO_ENTRIES:
1218     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1219     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1220     break;
1221   case MAT_ROW_ORIENTED:
1222     a->roworiented = PETSC_TRUE;
1223     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1224     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1225     break;
1226   case MAT_ROWS_SORTED:
1227   case MAT_ROWS_UNSORTED:
1228   case MAT_YES_NEW_DIAGONALS:
1229     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1230     break;
1231   case MAT_COLUMN_ORIENTED:
1232     a->roworiented = PETSC_FALSE;
1233     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1234     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1235     break;
1236   case MAT_IGNORE_OFF_PROC_ENTRIES:
1237     a->donotstash = PETSC_TRUE;
1238     break;
1239   case MAT_NO_NEW_DIAGONALS:
1240     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1241   case MAT_SYMMETRIC:
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_NOT_SYMMETRIC:
1248   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1249   case MAT_NOT_HERMITIAN:
1250   case MAT_NOT_SYMMETRY_ETERNAL:
1251     break;
1252   default:
1253     SETERRQ(PETSC_ERR_SUP,"unknown option");
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIAIJ"
1260 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap.rstart;
1266   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap.rstart,rend = matin->rmap.rend;
1267   PetscInt       *cmap,*idx_p;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1278     PetscInt     max = 1,tmp;
1279     for (i=0; i<matin->rmap.n; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1285   }
1286 
1287   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1288   lrow = row - rstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]];
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1341 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!aij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1348   }
1349   aij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatNorm_MPIAIJ"
1355 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1356 {
1357   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1358   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1359   PetscErrorCode ierr;
1360   PetscInt       i,j,cstart = mat->cmap.rstart;
1361   PetscReal      sum = 0.0;
1362   PetscScalar    *v;
1363 
1364   PetscFunctionBegin;
1365   if (aij->size == 1) {
1366     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1367   } else {
1368     if (type == NORM_FROBENIUS) {
1369       v = amat->a;
1370       for (i=0; i<amat->nz; i++) {
1371 #if defined(PETSC_USE_COMPLEX)
1372         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1373 #else
1374         sum += (*v)*(*v); v++;
1375 #endif
1376       }
1377       v = bmat->a;
1378       for (i=0; i<bmat->nz; i++) {
1379 #if defined(PETSC_USE_COMPLEX)
1380         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1381 #else
1382         sum += (*v)*(*v); v++;
1383 #endif
1384       }
1385       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1386       *norm = sqrt(*norm);
1387     } else if (type == NORM_1) { /* max column norm */
1388       PetscReal *tmp,*tmp2;
1389       PetscInt    *jj,*garray = aij->garray;
1390       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1391       ierr = PetscMalloc((mat->cmap.N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1392       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
1393       *norm = 0.0;
1394       v = amat->a; jj = amat->j;
1395       for (j=0; j<amat->nz; j++) {
1396         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1397       }
1398       v = bmat->a; jj = bmat->j;
1399       for (j=0; j<bmat->nz; j++) {
1400         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1401       }
1402       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1403       for (j=0; j<mat->cmap.N; j++) {
1404         if (tmp2[j] > *norm) *norm = tmp2[j];
1405       }
1406       ierr = PetscFree(tmp);CHKERRQ(ierr);
1407       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1408     } else if (type == NORM_INFINITY) { /* max row norm */
1409       PetscReal ntemp = 0.0;
1410       for (j=0; j<aij->A->rmap.n; j++) {
1411         v = amat->a + amat->i[j];
1412         sum = 0.0;
1413         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1414           sum += PetscAbsScalar(*v); v++;
1415         }
1416         v = bmat->a + bmat->i[j];
1417         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1418           sum += PetscAbsScalar(*v); v++;
1419         }
1420         if (sum > ntemp) ntemp = sum;
1421       }
1422       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1423     } else {
1424       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1425     }
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatTranspose_MPIAIJ"
1432 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1433 {
1434   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1435   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1436   PetscErrorCode ierr;
1437   PetscInt       M = A->rmap.N,N = A->cmap.N,m,*ai,*aj,row,*cols,i,*ct;
1438   Mat            B;
1439   PetscScalar    *array;
1440 
1441   PetscFunctionBegin;
1442   if (!matout && M != N) {
1443     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1444   }
1445 
1446   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1447   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1448   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1449   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1450 
1451   /* copy over the A part */
1452   Aloc = (Mat_SeqAIJ*)a->A->data;
1453   m = a->A->rmap.n; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row = A->rmap.rstart;
1455   for (i=0; i<ai[m]; i++) {aj[i] += A->cmap.rstart ;}
1456   for (i=0; i<m; i++) {
1457     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1458     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1459   }
1460   aj = Aloc->j;
1461   for (i=0; i<ai[m]; i++) {aj[i] -= A->cmap.rstart ;}
1462 
1463   /* copy over the B part */
1464   Aloc = (Mat_SeqAIJ*)a->B->data;
1465   m = a->B->rmap.n;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1466   row  = A->rmap.rstart;
1467   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1468   ct   = cols;
1469   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1470   for (i=0; i<m; i++) {
1471     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1472     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1473   }
1474   ierr = PetscFree(ct);CHKERRQ(ierr);
1475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1477   if (matout) {
1478     *matout = B;
1479   } else {
1480     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1487 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1488 {
1489   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1490   Mat            a = aij->A,b = aij->B;
1491   PetscErrorCode ierr;
1492   PetscInt       s1,s2,s3;
1493 
1494   PetscFunctionBegin;
1495   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1496   if (rr) {
1497     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1498     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1499     /* Overlap communication with computation. */
1500     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501   }
1502   if (ll) {
1503     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1504     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1505     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1506   }
1507   /* scale  the diagonal block */
1508   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1509 
1510   if (rr) {
1511     /* Do a scatter end and then right scale the off-diagonal block */
1512     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1513     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1514   }
1515 
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1522 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   if (!a->rank) {
1529     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1536 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1537 {
1538   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1543   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1544   PetscFunctionReturn(0);
1545 }
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1548 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1549 {
1550   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatEqual_MPIAIJ"
1560 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1561 {
1562   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1563   Mat            a,b,c,d;
1564   PetscTruth     flg;
1565   PetscErrorCode ierr;
1566 
1567   PetscFunctionBegin;
1568   a = matA->A; b = matA->B;
1569   c = matB->A; d = matB->B;
1570 
1571   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1572   if (flg) {
1573     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1574   }
1575   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatCopy_MPIAIJ"
1581 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1582 {
1583   PetscErrorCode ierr;
1584   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1585   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1586 
1587   PetscFunctionBegin;
1588   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1589   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1590     /* because of the column compression in the off-processor part of the matrix a->B,
1591        the number of columns in a->B and b->B may be different, hence we cannot call
1592        the MatCopy() directly on the two parts. If need be, we can provide a more
1593        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1594        then copying the submatrices */
1595     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1596   } else {
1597     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1598     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1599   }
1600   PetscFunctionReturn(0);
1601 }
1602 
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1605 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1606 {
1607   PetscErrorCode ierr;
1608 
1609   PetscFunctionBegin;
1610   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #include "petscblaslapack.h"
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatAXPY_MPIAIJ"
1617 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1618 {
1619   PetscErrorCode ierr;
1620   PetscInt       i;
1621   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1622   PetscBLASInt   bnz,one=1;
1623   Mat_SeqAIJ     *x,*y;
1624 
1625   PetscFunctionBegin;
1626   if (str == SAME_NONZERO_PATTERN) {
1627     PetscScalar alpha = a;
1628     x = (Mat_SeqAIJ *)xx->A->data;
1629     y = (Mat_SeqAIJ *)yy->A->data;
1630     bnz = (PetscBLASInt)x->nz;
1631     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1632     x = (Mat_SeqAIJ *)xx->B->data;
1633     y = (Mat_SeqAIJ *)yy->B->data;
1634     bnz = (PetscBLASInt)x->nz;
1635     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1636   } else if (str == SUBSET_NONZERO_PATTERN) {
1637     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1638 
1639     x = (Mat_SeqAIJ *)xx->B->data;
1640     y = (Mat_SeqAIJ *)yy->B->data;
1641     if (y->xtoy && y->XtoY != xx->B) {
1642       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1643       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1644     }
1645     if (!y->xtoy) { /* get xtoy */
1646       ierr = MatAXPYGetxtoy_Private(xx->B->rmap.n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1647       y->XtoY = xx->B;
1648     }
1649     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1650   } else {
1651     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1657 
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatConjugate_MPIAIJ"
1660 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1661 {
1662 #if defined(PETSC_USE_COMPLEX)
1663   PetscErrorCode ierr;
1664   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1668   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1669 #else
1670   PetscFunctionBegin;
1671 #endif
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatRealPart_MPIAIJ"
1677 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1678 {
1679   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1680   PetscErrorCode ierr;
1681 
1682   PetscFunctionBegin;
1683   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1684   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1690 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1691 {
1692   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1697   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 /* -------------------------------------------------------------------*/
1702 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1703        MatGetRow_MPIAIJ,
1704        MatRestoreRow_MPIAIJ,
1705        MatMult_MPIAIJ,
1706 /* 4*/ MatMultAdd_MPIAIJ,
1707        MatMultTranspose_MPIAIJ,
1708        MatMultTransposeAdd_MPIAIJ,
1709        0,
1710        0,
1711        0,
1712 /*10*/ 0,
1713        0,
1714        0,
1715        MatRelax_MPIAIJ,
1716        MatTranspose_MPIAIJ,
1717 /*15*/ MatGetInfo_MPIAIJ,
1718        MatEqual_MPIAIJ,
1719        MatGetDiagonal_MPIAIJ,
1720        MatDiagonalScale_MPIAIJ,
1721        MatNorm_MPIAIJ,
1722 /*20*/ MatAssemblyBegin_MPIAIJ,
1723        MatAssemblyEnd_MPIAIJ,
1724        0,
1725        MatSetOption_MPIAIJ,
1726        MatZeroEntries_MPIAIJ,
1727 /*25*/ MatZeroRows_MPIAIJ,
1728        0,
1729        0,
1730        0,
1731        0,
1732 /*30*/ MatSetUpPreallocation_MPIAIJ,
1733        0,
1734        0,
1735        0,
1736        0,
1737 /*35*/ MatDuplicate_MPIAIJ,
1738        0,
1739        0,
1740        0,
1741        0,
1742 /*40*/ MatAXPY_MPIAIJ,
1743        MatGetSubMatrices_MPIAIJ,
1744        MatIncreaseOverlap_MPIAIJ,
1745        MatGetValues_MPIAIJ,
1746        MatCopy_MPIAIJ,
1747 /*45*/ MatPrintHelp_MPIAIJ,
1748        MatScale_MPIAIJ,
1749        0,
1750        0,
1751        0,
1752 /*50*/ MatSetBlockSize_MPIAIJ,
1753        0,
1754        0,
1755        0,
1756        0,
1757 /*55*/ MatFDColoringCreate_MPIAIJ,
1758        0,
1759        MatSetUnfactored_MPIAIJ,
1760        MatPermute_MPIAIJ,
1761        0,
1762 /*60*/ MatGetSubMatrix_MPIAIJ,
1763        MatDestroy_MPIAIJ,
1764        MatView_MPIAIJ,
1765        0,
1766        0,
1767 /*65*/ 0,
1768        0,
1769        0,
1770        0,
1771        0,
1772 /*70*/ 0,
1773        0,
1774        MatSetColoring_MPIAIJ,
1775 #if defined(PETSC_HAVE_ADIC)
1776        MatSetValuesAdic_MPIAIJ,
1777 #else
1778        0,
1779 #endif
1780        MatSetValuesAdifor_MPIAIJ,
1781 /*75*/ 0,
1782        0,
1783        0,
1784        0,
1785        0,
1786 /*80*/ 0,
1787        0,
1788        0,
1789        0,
1790 /*84*/ MatLoad_MPIAIJ,
1791        0,
1792        0,
1793        0,
1794        0,
1795        0,
1796 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1797        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1798        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1799        MatPtAP_Basic,
1800        MatPtAPSymbolic_MPIAIJ,
1801 /*95*/ MatPtAPNumeric_MPIAIJ,
1802        0,
1803        0,
1804        0,
1805        0,
1806 /*100*/0,
1807        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1808        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1809        MatConjugate_MPIAIJ,
1810        0,
1811 /*105*/MatSetValuesRow_MPIAIJ,
1812        MatRealPart_MPIAIJ,
1813        MatImaginaryPart_MPIAIJ};
1814 
1815 /* ----------------------------------------------------------------------------------------*/
1816 
1817 EXTERN_C_BEGIN
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1820 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1821 {
1822   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1827   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 EXTERN_C_END
1831 
1832 EXTERN_C_BEGIN
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1835 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1836 {
1837   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1838   PetscErrorCode ierr;
1839 
1840   PetscFunctionBegin;
1841   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1842   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1843   PetscFunctionReturn(0);
1844 }
1845 EXTERN_C_END
1846 
1847 #include "petscpc.h"
1848 EXTERN_C_BEGIN
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1852 {
1853   Mat_MPIAIJ     *b;
1854   PetscErrorCode ierr;
1855   PetscInt       i;
1856 
1857   PetscFunctionBegin;
1858   B->preallocated = PETSC_TRUE;
1859   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1860   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1861   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1862   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1863 
1864   B->rmap.bs = B->cmap.bs = 1;
1865   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1866   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1867   if (d_nnz) {
1868     for (i=0; i<B->rmap.n; i++) {
1869       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1870     }
1871   }
1872   if (o_nnz) {
1873     for (i=0; i<B->rmap.n; i++) {
1874       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1875     }
1876   }
1877   b = (Mat_MPIAIJ*)B->data;
1878 
1879   /* Explicitly create 2 MATSEQAIJ matrices. */
1880   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1881   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1882   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
1883   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1884   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1885   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1886   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
1887   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1888 
1889   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1890   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1891 
1892   PetscFunctionReturn(0);
1893 }
1894 EXTERN_C_END
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1898 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1899 {
1900   Mat            mat;
1901   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   *newmat       = 0;
1906   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1907   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1908   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1909   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1910   a    = (Mat_MPIAIJ*)mat->data;
1911 
1912   mat->factor       = matin->factor;
1913   mat->rmap.bs      = matin->rmap.bs;
1914   mat->assembled    = PETSC_TRUE;
1915   mat->insertmode   = NOT_SET_VALUES;
1916   mat->preallocated = PETSC_TRUE;
1917 
1918   a->size           = oldmat->size;
1919   a->rank           = oldmat->rank;
1920   a->donotstash     = oldmat->donotstash;
1921   a->roworiented    = oldmat->roworiented;
1922   a->rowindices     = 0;
1923   a->rowvalues      = 0;
1924   a->getrowactive   = PETSC_FALSE;
1925 
1926   ierr = PetscMapCopy(mat->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1927   ierr = PetscMapCopy(mat->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1928 
1929   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1930   if (oldmat->colmap) {
1931 #if defined (PETSC_USE_CTABLE)
1932     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1933 #else
1934     ierr = PetscMalloc((mat->cmap.N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1935     ierr = PetscLogObjectMemory(mat,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1936     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap.N)*sizeof(PetscInt));CHKERRQ(ierr);
1937 #endif
1938   } else a->colmap = 0;
1939   if (oldmat->garray) {
1940     PetscInt len;
1941     len  = oldmat->B->cmap.n;
1942     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1944     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1945   } else a->garray = 0;
1946 
1947   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1948   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1949   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1950   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1951   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1952   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1953   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1955   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1956   *newmat = mat;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscsys.h"
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatLoad_MPIAIJ"
1964 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1965 {
1966   Mat            A;
1967   PetscScalar    *vals,*svals;
1968   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1969   MPI_Status     status;
1970   PetscErrorCode ierr;
1971   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1972   PetscInt       i,nz,j,rstart,rend,mmax;
1973   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1974   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1975   PetscInt       cend,cstart,n,*rowners;
1976   int            fd;
1977 
1978   PetscFunctionBegin;
1979   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1980   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1981   if (!rank) {
1982     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1983     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1984     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1985   }
1986 
1987   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1988   M = header[1]; N = header[2];
1989   /* determine ownership of all rows */
1990   m    = M/size + ((M % size) > rank);
1991   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1992   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1993 
1994   /* First process needs enough room for process with most rows */
1995   if (!rank) {
1996     mmax       = rowners[1];
1997     for (i=2; i<size; i++) {
1998       mmax = PetscMax(mmax,rowners[i]);
1999     }
2000   } else mmax = m;
2001 
2002   rowners[0] = 0;
2003   for (i=2; i<=size; i++) {
2004     mmax       = PetscMax(mmax,rowners[i]);
2005     rowners[i] += rowners[i-1];
2006   }
2007   rstart = rowners[rank];
2008   rend   = rowners[rank+1];
2009 
2010   /* distribute row lengths to all processors */
2011   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2012   if (!rank) {
2013     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2014     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2015     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2016     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2017     for (j=0; j<m; j++) {
2018       procsnz[0] += ourlens[j];
2019     }
2020     for (i=1; i<size; i++) {
2021       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2022       /* calculate the number of nonzeros on each processor */
2023       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2024         procsnz[i] += rowlengths[j];
2025       }
2026       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2027     }
2028     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2029   } else {
2030     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2031   }
2032 
2033   if (!rank) {
2034     /* determine max buffer needed and allocate it */
2035     maxnz = 0;
2036     for (i=0; i<size; i++) {
2037       maxnz = PetscMax(maxnz,procsnz[i]);
2038     }
2039     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2040 
2041     /* read in my part of the matrix column indices  */
2042     nz   = procsnz[0];
2043     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2044     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2045 
2046     /* read in every one elses and ship off */
2047     for (i=1; i<size; i++) {
2048       nz   = procsnz[i];
2049       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2050       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2051     }
2052     ierr = PetscFree(cols);CHKERRQ(ierr);
2053   } else {
2054     /* determine buffer space needed for message */
2055     nz = 0;
2056     for (i=0; i<m; i++) {
2057       nz += ourlens[i];
2058     }
2059     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2060 
2061     /* receive message of column indices*/
2062     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2063     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2064     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2065   }
2066 
2067   /* determine column ownership if matrix is not square */
2068   if (N != M) {
2069     n      = N/size + ((N % size) > rank);
2070     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2071     cstart = cend - n;
2072   } else {
2073     cstart = rstart;
2074     cend   = rend;
2075     n      = cend - cstart;
2076   }
2077 
2078   /* loop over local rows, determining number of off diagonal entries */
2079   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2080   jj = 0;
2081   for (i=0; i<m; i++) {
2082     for (j=0; j<ourlens[i]; j++) {
2083       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2084       jj++;
2085     }
2086   }
2087 
2088   /* create our matrix */
2089   for (i=0; i<m; i++) {
2090     ourlens[i] -= offlens[i];
2091   }
2092   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2093   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2094   ierr = MatSetType(A,type);CHKERRQ(ierr);
2095   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2096 
2097   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2098   for (i=0; i<m; i++) {
2099     ourlens[i] += offlens[i];
2100   }
2101 
2102   if (!rank) {
2103     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2104 
2105     /* read in my part of the matrix numerical values  */
2106     nz   = procsnz[0];
2107     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2108 
2109     /* insert into matrix */
2110     jj      = rstart;
2111     smycols = mycols;
2112     svals   = vals;
2113     for (i=0; i<m; i++) {
2114       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2115       smycols += ourlens[i];
2116       svals   += ourlens[i];
2117       jj++;
2118     }
2119 
2120     /* read in other processors and ship out */
2121     for (i=1; i<size; i++) {
2122       nz   = procsnz[i];
2123       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2124       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2125     }
2126     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2127   } else {
2128     /* receive numeric values */
2129     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2130 
2131     /* receive message of values*/
2132     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2133     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2134     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2135 
2136     /* insert into matrix */
2137     jj      = rstart;
2138     smycols = mycols;
2139     svals   = vals;
2140     for (i=0; i<m; i++) {
2141       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2142       smycols += ourlens[i];
2143       svals   += ourlens[i];
2144       jj++;
2145     }
2146   }
2147   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2148   ierr = PetscFree(vals);CHKERRQ(ierr);
2149   ierr = PetscFree(mycols);CHKERRQ(ierr);
2150   ierr = PetscFree(rowners);CHKERRQ(ierr);
2151 
2152   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2153   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2154   *newmat = A;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2160 /*
2161     Not great since it makes two copies of the submatrix, first an SeqAIJ
2162   in local and then by concatenating the local matrices the end result.
2163   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2164 */
2165 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2166 {
2167   PetscErrorCode ierr;
2168   PetscMPIInt    rank,size;
2169   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2170   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2171   Mat            *local,M,Mreuse;
2172   PetscScalar    *vwork,*aa;
2173   MPI_Comm       comm = mat->comm;
2174   Mat_SeqAIJ     *aij;
2175 
2176 
2177   PetscFunctionBegin;
2178   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2179   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2180 
2181   if (call ==  MAT_REUSE_MATRIX) {
2182     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2183     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2184     local = &Mreuse;
2185     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2186   } else {
2187     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2188     Mreuse = *local;
2189     ierr   = PetscFree(local);CHKERRQ(ierr);
2190   }
2191 
2192   /*
2193       m - number of local rows
2194       n - number of columns (same on all processors)
2195       rstart - first row in new global matrix generated
2196   */
2197   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2198   if (call == MAT_INITIAL_MATRIX) {
2199     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2200     ii  = aij->i;
2201     jj  = aij->j;
2202 
2203     /*
2204         Determine the number of non-zeros in the diagonal and off-diagonal
2205         portions of the matrix in order to do correct preallocation
2206     */
2207 
2208     /* first get start and end of "diagonal" columns */
2209     if (csize == PETSC_DECIDE) {
2210       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2211       if (mglobal == n) { /* square matrix */
2212 	nlocal = m;
2213       } else {
2214         nlocal = n/size + ((n % size) > rank);
2215       }
2216     } else {
2217       nlocal = csize;
2218     }
2219     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2220     rstart = rend - nlocal;
2221     if (rank == size - 1 && rend != n) {
2222       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2223     }
2224 
2225     /* next, compute all the lengths */
2226     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2227     olens = dlens + m;
2228     for (i=0; i<m; i++) {
2229       jend = ii[i+1] - ii[i];
2230       olen = 0;
2231       dlen = 0;
2232       for (j=0; j<jend; j++) {
2233         if (*jj < rstart || *jj >= rend) olen++;
2234         else dlen++;
2235         jj++;
2236       }
2237       olens[i] = olen;
2238       dlens[i] = dlen;
2239     }
2240     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2241     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2242     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2244     ierr = PetscFree(dlens);CHKERRQ(ierr);
2245   } else {
2246     PetscInt ml,nl;
2247 
2248     M = *newmat;
2249     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2250     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2251     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2252     /*
2253          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2254        rather than the slower MatSetValues().
2255     */
2256     M->was_assembled = PETSC_TRUE;
2257     M->assembled     = PETSC_FALSE;
2258   }
2259   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2260   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2261   ii  = aij->i;
2262   jj  = aij->j;
2263   aa  = aij->a;
2264   for (i=0; i<m; i++) {
2265     row   = rstart + i;
2266     nz    = ii[i+1] - ii[i];
2267     cwork = jj;     jj += nz;
2268     vwork = aa;     aa += nz;
2269     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2270   }
2271 
2272   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2273   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   *newmat = M;
2275 
2276   /* save submatrix used in processor for next request */
2277   if (call ==  MAT_INITIAL_MATRIX) {
2278     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2279     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2280   }
2281 
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 EXTERN_C_BEGIN
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2288 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2289 {
2290   PetscInt       m,cstart, cend,j,nnz,i,d;
2291   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
2292   const PetscInt *JJ;
2293   PetscScalar    *values;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2298 
2299   B->rmap.bs = B->cmap.bs = 1;
2300   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2301   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2302   m      = B->rmap.n;
2303   cstart = B->cmap.rstart;
2304   cend   = B->cmap.rend;
2305   rstart = B->rmap.rstart;
2306 
2307   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2308   o_nnz = d_nnz + m;
2309 
2310   for (i=0; i<m; i++) {
2311     nnz     = I[i+1]- I[i];
2312     JJ      = J + I[i];
2313     nnz_max = PetscMax(nnz_max,nnz);
2314     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2315     for (j=0; j<nnz; j++) {
2316       if (*JJ >= cstart) break;
2317       JJ++;
2318     }
2319     d = 0;
2320     for (; j<nnz; j++) {
2321       if (*JJ++ >= cend) break;
2322       d++;
2323     }
2324     d_nnz[i] = d;
2325     o_nnz[i] = nnz - d;
2326   }
2327   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2328   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2329 
2330   if (v) values = (PetscScalar*)v;
2331   else {
2332     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2333     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2334   }
2335 
2336   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2337   for (i=0; i<m; i++) {
2338     ii   = i + rstart;
2339     nnz  = I[i+1]- I[i];
2340     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2341   }
2342   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2343   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2344   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2345 
2346   if (!v) {
2347     ierr = PetscFree(values);CHKERRQ(ierr);
2348   }
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2355 /*@
2356    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2357    (the default parallel PETSc format).
2358 
2359    Collective on MPI_Comm
2360 
2361    Input Parameters:
2362 +  B - the matrix
2363 .  i - the indices into j for the start of each local row (starts with zero)
2364 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2365 -  v - optional values in the matrix
2366 
2367    Level: developer
2368 
2369 .keywords: matrix, aij, compressed row, sparse, parallel
2370 
2371 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2372 @*/
2373 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2374 {
2375   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2376 
2377   PetscFunctionBegin;
2378   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2379   if (f) {
2380     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 #undef __FUNCT__
2386 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2387 /*@C
2388    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2389    (the default parallel PETSc format).  For good matrix assembly performance
2390    the user should preallocate the matrix storage by setting the parameters
2391    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2392    performance can be increased by more than a factor of 50.
2393 
2394    Collective on MPI_Comm
2395 
2396    Input Parameters:
2397 +  A - the matrix
2398 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2399            (same value is used for all local rows)
2400 .  d_nnz - array containing the number of nonzeros in the various rows of the
2401            DIAGONAL portion of the local submatrix (possibly different for each row)
2402            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2403            The size of this array is equal to the number of local rows, i.e 'm'.
2404            You must leave room for the diagonal entry even if it is zero.
2405 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2406            submatrix (same value is used for all local rows).
2407 -  o_nnz - array containing the number of nonzeros in the various rows of the
2408            OFF-DIAGONAL portion of the local submatrix (possibly different for
2409            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2410            structure. The size of this array is equal to the number
2411            of local rows, i.e 'm'.
2412 
2413    If the *_nnz parameter is given then the *_nz parameter is ignored
2414 
2415    The AIJ format (also called the Yale sparse matrix format or
2416    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2417    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2418 
2419    The parallel matrix is partitioned such that the first m0 rows belong to
2420    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2421    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2422 
2423    The DIAGONAL portion of the local submatrix of a processor can be defined
2424    as the submatrix which is obtained by extraction the part corresponding
2425    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2426    first row that belongs to the processor, and r2 is the last row belonging
2427    to the this processor. This is a square mxm matrix. The remaining portion
2428    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2429 
2430    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2431 
2432    Example usage:
2433 
2434    Consider the following 8x8 matrix with 34 non-zero values, that is
2435    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2436    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2437    as follows:
2438 
2439 .vb
2440             1  2  0  |  0  3  0  |  0  4
2441     Proc0   0  5  6  |  7  0  0  |  8  0
2442             9  0 10  | 11  0  0  | 12  0
2443     -------------------------------------
2444            13  0 14  | 15 16 17  |  0  0
2445     Proc1   0 18  0  | 19 20 21  |  0  0
2446             0  0  0  | 22 23  0  | 24  0
2447     -------------------------------------
2448     Proc2  25 26 27  |  0  0 28  | 29  0
2449            30  0  0  | 31 32 33  |  0 34
2450 .ve
2451 
2452    This can be represented as a collection of submatrices as:
2453 
2454 .vb
2455       A B C
2456       D E F
2457       G H I
2458 .ve
2459 
2460    Where the submatrices A,B,C are owned by proc0, D,E,F are
2461    owned by proc1, G,H,I are owned by proc2.
2462 
2463    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2464    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2465    The 'M','N' parameters are 8,8, and have the same values on all procs.
2466 
2467    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2468    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2469    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2470    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2471    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2472    matrix, ans [DF] as another SeqAIJ matrix.
2473 
2474    When d_nz, o_nz parameters are specified, d_nz storage elements are
2475    allocated for every row of the local diagonal submatrix, and o_nz
2476    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2477    One way to choose d_nz and o_nz is to use the max nonzerors per local
2478    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2479    In this case, the values of d_nz,o_nz are:
2480 .vb
2481      proc0 : dnz = 2, o_nz = 2
2482      proc1 : dnz = 3, o_nz = 2
2483      proc2 : dnz = 1, o_nz = 4
2484 .ve
2485    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2486    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2487    for proc3. i.e we are using 12+15+10=37 storage locations to store
2488    34 values.
2489 
2490    When d_nnz, o_nnz parameters are specified, the storage is specified
2491    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2492    In the above case the values for d_nnz,o_nnz are:
2493 .vb
2494      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2495      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2496      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2497 .ve
2498    Here the space allocated is sum of all the above values i.e 34, and
2499    hence pre-allocation is perfect.
2500 
2501    Level: intermediate
2502 
2503 .keywords: matrix, aij, compressed row, sparse, parallel
2504 
2505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2506           MPIAIJ
2507 @*/
2508 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2509 {
2510   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2511 
2512   PetscFunctionBegin;
2513   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2514   if (f) {
2515     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatCreateMPIAIJ"
2522 /*@C
2523    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2524    (the default parallel PETSc format).  For good matrix assembly performance
2525    the user should preallocate the matrix storage by setting the parameters
2526    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2527    performance can be increased by more than a factor of 50.
2528 
2529    Collective on MPI_Comm
2530 
2531    Input Parameters:
2532 +  comm - MPI communicator
2533 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2534            This value should be the same as the local size used in creating the
2535            y vector for the matrix-vector product y = Ax.
2536 .  n - This value should be the same as the local size used in creating the
2537        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2538        calculated if N is given) For square matrices n is almost always m.
2539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2541 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2542            (same value is used for all local rows)
2543 .  d_nnz - array containing the number of nonzeros in the various rows of the
2544            DIAGONAL portion of the local submatrix (possibly different for each row)
2545            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2546            The size of this array is equal to the number of local rows, i.e 'm'.
2547            You must leave room for the diagonal entry even if it is zero.
2548 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2549            submatrix (same value is used for all local rows).
2550 -  o_nnz - array containing the number of nonzeros in the various rows of the
2551            OFF-DIAGONAL portion of the local submatrix (possibly different for
2552            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2553            structure. The size of this array is equal to the number
2554            of local rows, i.e 'm'.
2555 
2556    Output Parameter:
2557 .  A - the matrix
2558 
2559    Notes:
2560    If the *_nnz parameter is given then the *_nz parameter is ignored
2561 
2562    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2563    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2564    storage requirements for this matrix.
2565 
2566    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2567    processor than it must be used on all processors that share the object for
2568    that argument.
2569 
2570    The user MUST specify either the local or global matrix dimensions
2571    (possibly both).
2572 
2573    The parallel matrix is partitioned such that the first m0 rows belong to
2574    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2575    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2576 
2577    The DIAGONAL portion of the local submatrix of a processor can be defined
2578    as the submatrix which is obtained by extraction the part corresponding
2579    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2580    first row that belongs to the processor, and r2 is the last row belonging
2581    to the this processor. This is a square mxm matrix. The remaining portion
2582    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2583 
2584    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2585 
2586    When calling this routine with a single process communicator, a matrix of
2587    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2588    type of communicator, use the construction mechanism:
2589      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2590 
2591    By default, this format uses inodes (identical nodes) when possible.
2592    We search for consecutive rows with the same nonzero structure, thereby
2593    reusing matrix information to achieve increased efficiency.
2594 
2595    Options Database Keys:
2596 +  -mat_no_inode  - Do not use inodes
2597 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2598 -  -mat_aij_oneindex - Internally use indexing starting at 1
2599         rather than 0.  Note that when calling MatSetValues(),
2600         the user still MUST index entries starting at 0!
2601 
2602 
2603    Example usage:
2604 
2605    Consider the following 8x8 matrix with 34 non-zero values, that is
2606    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2607    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2608    as follows:
2609 
2610 .vb
2611             1  2  0  |  0  3  0  |  0  4
2612     Proc0   0  5  6  |  7  0  0  |  8  0
2613             9  0 10  | 11  0  0  | 12  0
2614     -------------------------------------
2615            13  0 14  | 15 16 17  |  0  0
2616     Proc1   0 18  0  | 19 20 21  |  0  0
2617             0  0  0  | 22 23  0  | 24  0
2618     -------------------------------------
2619     Proc2  25 26 27  |  0  0 28  | 29  0
2620            30  0  0  | 31 32 33  |  0 34
2621 .ve
2622 
2623    This can be represented as a collection of submatrices as:
2624 
2625 .vb
2626       A B C
2627       D E F
2628       G H I
2629 .ve
2630 
2631    Where the submatrices A,B,C are owned by proc0, D,E,F are
2632    owned by proc1, G,H,I are owned by proc2.
2633 
2634    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2635    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2636    The 'M','N' parameters are 8,8, and have the same values on all procs.
2637 
2638    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2639    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2640    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2641    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2642    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2643    matrix, ans [DF] as another SeqAIJ matrix.
2644 
2645    When d_nz, o_nz parameters are specified, d_nz storage elements are
2646    allocated for every row of the local diagonal submatrix, and o_nz
2647    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2648    One way to choose d_nz and o_nz is to use the max nonzerors per local
2649    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2650    In this case, the values of d_nz,o_nz are:
2651 .vb
2652      proc0 : dnz = 2, o_nz = 2
2653      proc1 : dnz = 3, o_nz = 2
2654      proc2 : dnz = 1, o_nz = 4
2655 .ve
2656    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2657    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2658    for proc3. i.e we are using 12+15+10=37 storage locations to store
2659    34 values.
2660 
2661    When d_nnz, o_nnz parameters are specified, the storage is specified
2662    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2663    In the above case the values for d_nnz,o_nnz are:
2664 .vb
2665      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2666      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2667      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2668 .ve
2669    Here the space allocated is sum of all the above values i.e 34, and
2670    hence pre-allocation is perfect.
2671 
2672    Level: intermediate
2673 
2674 .keywords: matrix, aij, compressed row, sparse, parallel
2675 
2676 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2677           MPIAIJ
2678 @*/
2679 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2680 {
2681   PetscErrorCode ierr;
2682   PetscMPIInt    size;
2683 
2684   PetscFunctionBegin;
2685   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2686   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2687   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2688   if (size > 1) {
2689     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2690     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2691   } else {
2692     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2693     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2694   }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 #undef __FUNCT__
2699 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2700 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2701 {
2702   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2703 
2704   PetscFunctionBegin;
2705   *Ad     = a->A;
2706   *Ao     = a->B;
2707   *colmap = a->garray;
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2713 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2714 {
2715   PetscErrorCode ierr;
2716   PetscInt       i;
2717   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2718 
2719   PetscFunctionBegin;
2720   if (coloring->ctype == IS_COLORING_LOCAL) {
2721     ISColoringValue *allcolors,*colors;
2722     ISColoring      ocoloring;
2723 
2724     /* set coloring for diagonal portion */
2725     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2726 
2727     /* set coloring for off-diagonal portion */
2728     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2729     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2730     for (i=0; i<a->B->cmap.n; i++) {
2731       colors[i] = allcolors[a->garray[i]];
2732     }
2733     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2734     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2735     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2736     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2737   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2738     ISColoringValue *colors;
2739     PetscInt        *larray;
2740     ISColoring      ocoloring;
2741 
2742     /* set coloring for diagonal portion */
2743     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2744     for (i=0; i<a->A->cmap.n; i++) {
2745       larray[i] = i + A->cmap.rstart;
2746     }
2747     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->cmap.n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2748     ierr = PetscMalloc((a->A->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2749     for (i=0; i<a->A->cmap.n; i++) {
2750       colors[i] = coloring->colors[larray[i]];
2751     }
2752     ierr = PetscFree(larray);CHKERRQ(ierr);
2753     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2754     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2755     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2756 
2757     /* set coloring for off-diagonal portion */
2758     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2759     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->cmap.n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2760     ierr = PetscMalloc((a->B->cmap.n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2761     for (i=0; i<a->B->cmap.n; i++) {
2762       colors[i] = coloring->colors[larray[i]];
2763     }
2764     ierr = PetscFree(larray);CHKERRQ(ierr);
2765     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->cmap.n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2766     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2767     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2768   } else {
2769     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2770   }
2771 
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 #if defined(PETSC_HAVE_ADIC)
2776 #undef __FUNCT__
2777 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2778 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2779 {
2780   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2781   PetscErrorCode ierr;
2782 
2783   PetscFunctionBegin;
2784   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2785   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 #endif
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2792 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2793 {
2794   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2799   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatMerge"
2805 /*@C
2806       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2807                  matrices from each processor
2808 
2809     Collective on MPI_Comm
2810 
2811    Input Parameters:
2812 +    comm - the communicators the parallel matrix will live on
2813 .    inmat - the input sequential matrices
2814 .    n - number of local columns (or PETSC_DECIDE)
2815 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2816 
2817    Output Parameter:
2818 .    outmat - the parallel matrix generated
2819 
2820     Level: advanced
2821 
2822    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2823 
2824 @*/
2825 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2826 {
2827   PetscErrorCode ierr;
2828   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2829   PetscInt       *indx;
2830   PetscScalar    *values;
2831 
2832   PetscFunctionBegin;
2833   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2834   if (scall == MAT_INITIAL_MATRIX){
2835     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2836     if (n == PETSC_DECIDE){
2837       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2838     }
2839     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2840     rstart -= m;
2841 
2842     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2843     for (i=0;i<m;i++) {
2844       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2845       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2846       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2847     }
2848     /* This routine will ONLY return MPIAIJ type matrix */
2849     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2850     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2851     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2852     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2853     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2854 
2855   } else if (scall == MAT_REUSE_MATRIX){
2856     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2857   } else {
2858     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2859   }
2860 
2861   for (i=0;i<m;i++) {
2862     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2863     I    = i + rstart;
2864     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2865     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2866   }
2867   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2868   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2869   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870 
2871   PetscFunctionReturn(0);
2872 }
2873 
2874 #undef __FUNCT__
2875 #define __FUNCT__ "MatFileSplit"
2876 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2877 {
2878   PetscErrorCode    ierr;
2879   PetscMPIInt       rank;
2880   PetscInt          m,N,i,rstart,nnz;
2881   size_t            len;
2882   const PetscInt    *indx;
2883   PetscViewer       out;
2884   char              *name;
2885   Mat               B;
2886   const PetscScalar *values;
2887 
2888   PetscFunctionBegin;
2889   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2890   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2891   /* Should this be the type of the diagonal block of A? */
2892   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2893   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2894   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2895   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2896   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2897   for (i=0;i<m;i++) {
2898     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2899     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2900     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2901   }
2902   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2903   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904 
2905   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2906   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2907   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2908   sprintf(name,"%s.%d",outfile,rank);
2909   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2910   ierr = PetscFree(name);
2911   ierr = MatView(B,out);CHKERRQ(ierr);
2912   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2913   ierr = MatDestroy(B);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2920 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2921 {
2922   PetscErrorCode       ierr;
2923   Mat_Merge_SeqsToMPI  *merge;
2924   PetscObjectContainer container;
2925 
2926   PetscFunctionBegin;
2927   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2928   if (container) {
2929     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2930     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2931     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2932     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2933     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2934     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2935     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2936     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2937     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
2938     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
2939     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
2940 
2941     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2942     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2943   }
2944   ierr = PetscFree(merge);CHKERRQ(ierr);
2945 
2946   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 #include "src/mat/utils/freespace.h"
2951 #include "petscbt.h"
2952 static PetscEvent logkey_seqstompinum = 0;
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2955 /*@C
2956       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2957                  matrices from each processor
2958 
2959     Collective on MPI_Comm
2960 
2961    Input Parameters:
2962 +    comm - the communicators the parallel matrix will live on
2963 .    seqmat - the input sequential matrices
2964 .    m - number of local rows (or PETSC_DECIDE)
2965 .    n - number of local columns (or PETSC_DECIDE)
2966 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2967 
2968    Output Parameter:
2969 .    mpimat - the parallel matrix generated
2970 
2971     Level: advanced
2972 
2973    Notes:
2974      The dimensions of the sequential matrix in each processor MUST be the same.
2975      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2976      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2977 @*/
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2979 {
2980   PetscErrorCode       ierr;
2981   MPI_Comm             comm=mpimat->comm;
2982   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2983   PetscMPIInt          size,rank,taga,*len_s;
2984   PetscInt             N=mpimat->cmap.N,i,j,*owners,*ai=a->i,*aj=a->j;
2985   PetscInt             proc,m;
2986   PetscInt             **buf_ri,**buf_rj;
2987   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2988   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2989   MPI_Request          *s_waits,*r_waits;
2990   MPI_Status           *status;
2991   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2992   Mat_Merge_SeqsToMPI  *merge;
2993   PetscObjectContainer container;
2994 
2995   PetscFunctionBegin;
2996   if (!logkey_seqstompinum) {
2997     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2998   }
2999   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3000 
3001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3002   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3003 
3004   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
3005   if (container) {
3006     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
3007   }
3008   bi     = merge->bi;
3009   bj     = merge->bj;
3010   buf_ri = merge->buf_ri;
3011   buf_rj = merge->buf_rj;
3012 
3013   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3014   owners = merge->rowmap.range;
3015   len_s  = merge->len_s;
3016 
3017   /* send and recv matrix values */
3018   /*-----------------------------*/
3019   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3020   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3021 
3022   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3023   for (proc=0,k=0; proc<size; proc++){
3024     if (!len_s[proc]) continue;
3025     i = owners[proc];
3026     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3027     k++;
3028   }
3029 
3030   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3031   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3032   ierr = PetscFree(status);CHKERRQ(ierr);
3033 
3034   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3035   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3036 
3037   /* insert mat values of mpimat */
3038   /*----------------------------*/
3039   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3040   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3041   nextrow = buf_ri_k + merge->nrecv;
3042   nextai  = nextrow + merge->nrecv;
3043 
3044   for (k=0; k<merge->nrecv; k++){
3045     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3046     nrows = *(buf_ri_k[k]);
3047     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3048     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3049   }
3050 
3051   /* set values of ba */
3052   m = merge->rowmap.n;
3053   for (i=0; i<m; i++) {
3054     arow = owners[rank] + i;
3055     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3056     bnzi = bi[i+1] - bi[i];
3057     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3058 
3059     /* add local non-zero vals of this proc's seqmat into ba */
3060     anzi = ai[arow+1] - ai[arow];
3061     aj   = a->j + ai[arow];
3062     aa   = a->a + ai[arow];
3063     nextaj = 0;
3064     for (j=0; nextaj<anzi; j++){
3065       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3066         ba_i[j] += aa[nextaj++];
3067       }
3068     }
3069 
3070     /* add received vals into ba */
3071     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3072       /* i-th row */
3073       if (i == *nextrow[k]) {
3074         anzi = *(nextai[k]+1) - *nextai[k];
3075         aj   = buf_rj[k] + *(nextai[k]);
3076         aa   = abuf_r[k] + *(nextai[k]);
3077         nextaj = 0;
3078         for (j=0; nextaj<anzi; j++){
3079           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3080             ba_i[j] += aa[nextaj++];
3081           }
3082         }
3083         nextrow[k]++; nextai[k]++;
3084       }
3085     }
3086     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3087   }
3088   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3089   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3090 
3091   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3092   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3093   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3094   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 static PetscEvent logkey_seqstompisym = 0;
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3101 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3102 {
3103   PetscErrorCode       ierr;
3104   Mat                  B_mpi;
3105   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3106   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3107   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3108   PetscInt             M=seqmat->rmap.n,N=seqmat->cmap.n,i,*owners,*ai=a->i,*aj=a->j;
3109   PetscInt             len,proc,*dnz,*onz;
3110   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3111   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3112   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3113   MPI_Status           *status;
3114   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3115   PetscBT              lnkbt;
3116   Mat_Merge_SeqsToMPI  *merge;
3117   PetscObjectContainer container;
3118 
3119   PetscFunctionBegin;
3120   if (!logkey_seqstompisym) {
3121     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3122   }
3123   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3124 
3125   /* make sure it is a PETSc comm */
3126   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3127   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3128   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3129 
3130   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3131   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3132 
3133   /* determine row ownership */
3134   /*---------------------------------------------------------*/
3135   merge->rowmap.n = m;
3136   merge->rowmap.N = M;
3137   ierr = PetscMapInitialize(comm,&merge->rowmap);CHKERRQ(ierr);
3138   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3139   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3140 
3141   m      = merge->rowmap.n;
3142   M      = merge->rowmap.N;
3143   owners = merge->rowmap.range;
3144 
3145   /* determine the number of messages to send, their lengths */
3146   /*---------------------------------------------------------*/
3147   len_s  = merge->len_s;
3148 
3149   len = 0;  /* length of buf_si[] */
3150   merge->nsend = 0;
3151   for (proc=0; proc<size; proc++){
3152     len_si[proc] = 0;
3153     if (proc == rank){
3154       len_s[proc] = 0;
3155     } else {
3156       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3157       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3158     }
3159     if (len_s[proc]) {
3160       merge->nsend++;
3161       nrows = 0;
3162       for (i=owners[proc]; i<owners[proc+1]; i++){
3163         if (ai[i+1] > ai[i]) nrows++;
3164       }
3165       len_si[proc] = 2*(nrows+1);
3166       len += len_si[proc];
3167     }
3168   }
3169 
3170   /* determine the number and length of messages to receive for ij-structure */
3171   /*-------------------------------------------------------------------------*/
3172   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3173   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3174 
3175   /* post the Irecv of j-structure */
3176   /*-------------------------------*/
3177   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr);
3178   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3179 
3180   /* post the Isend of j-structure */
3181   /*--------------------------------*/
3182   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3183   sj_waits = si_waits + merge->nsend;
3184 
3185   for (proc=0, k=0; proc<size; proc++){
3186     if (!len_s[proc]) continue;
3187     i = owners[proc];
3188     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3189     k++;
3190   }
3191 
3192   /* receives and sends of j-structure are complete */
3193   /*------------------------------------------------*/
3194   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3195   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3196 
3197   /* send and recv i-structure */
3198   /*---------------------------*/
3199   ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr);
3200   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3201 
3202   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3203   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3204   for (proc=0,k=0; proc<size; proc++){
3205     if (!len_s[proc]) continue;
3206     /* form outgoing message for i-structure:
3207          buf_si[0]:                 nrows to be sent
3208                [1:nrows]:           row index (global)
3209                [nrows+1:2*nrows+1]: i-structure index
3210     */
3211     /*-------------------------------------------*/
3212     nrows = len_si[proc]/2 - 1;
3213     buf_si_i    = buf_si + nrows+1;
3214     buf_si[0]   = nrows;
3215     buf_si_i[0] = 0;
3216     nrows = 0;
3217     for (i=owners[proc]; i<owners[proc+1]; i++){
3218       anzi = ai[i+1] - ai[i];
3219       if (anzi) {
3220         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3221         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3222         nrows++;
3223       }
3224     }
3225     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3226     k++;
3227     buf_si += len_si[proc];
3228   }
3229 
3230   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3231   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3232 
3233   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3234   for (i=0; i<merge->nrecv; i++){
3235     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3236   }
3237 
3238   ierr = PetscFree(len_si);CHKERRQ(ierr);
3239   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3240   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3241   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3242   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3243   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3244   ierr = PetscFree(status);CHKERRQ(ierr);
3245 
3246   /* compute a local seq matrix in each processor */
3247   /*----------------------------------------------*/
3248   /* allocate bi array and free space for accumulating nonzero column info */
3249   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3250   bi[0] = 0;
3251 
3252   /* create and initialize a linked list */
3253   nlnk = N+1;
3254   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3255 
3256   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3257   len = 0;
3258   len  = ai[owners[rank+1]] - ai[owners[rank]];
3259   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3260   current_space = free_space;
3261 
3262   /* determine symbolic info for each local row */
3263   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3264   nextrow = buf_ri_k + merge->nrecv;
3265   nextai  = nextrow + merge->nrecv;
3266   for (k=0; k<merge->nrecv; k++){
3267     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3268     nrows = *buf_ri_k[k];
3269     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3270     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3271   }
3272 
3273   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3274   len = 0;
3275   for (i=0;i<m;i++) {
3276     bnzi   = 0;
3277     /* add local non-zero cols of this proc's seqmat into lnk */
3278     arow   = owners[rank] + i;
3279     anzi   = ai[arow+1] - ai[arow];
3280     aj     = a->j + ai[arow];
3281     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3282     bnzi += nlnk;
3283     /* add received col data into lnk */
3284     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3285       if (i == *nextrow[k]) { /* i-th row */
3286         anzi = *(nextai[k]+1) - *nextai[k];
3287         aj   = buf_rj[k] + *nextai[k];
3288         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3289         bnzi += nlnk;
3290         nextrow[k]++; nextai[k]++;
3291       }
3292     }
3293     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3294 
3295     /* if free space is not available, make more free space */
3296     if (current_space->local_remaining<bnzi) {
3297       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3298       nspacedouble++;
3299     }
3300     /* copy data into free space, then initialize lnk */
3301     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3302     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3303 
3304     current_space->array           += bnzi;
3305     current_space->local_used      += bnzi;
3306     current_space->local_remaining -= bnzi;
3307 
3308     bi[i+1] = bi[i] + bnzi;
3309   }
3310 
3311   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3312 
3313   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3314   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3315   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3316 
3317   /* create symbolic parallel matrix B_mpi */
3318   /*---------------------------------------*/
3319   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3320   if (n==PETSC_DECIDE) {
3321     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3322   } else {
3323     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3324   }
3325   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3326   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3327   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3328 
3329   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3330   B_mpi->assembled     = PETSC_FALSE;
3331   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3332   merge->bi            = bi;
3333   merge->bj            = bj;
3334   merge->buf_ri        = buf_ri;
3335   merge->buf_rj        = buf_rj;
3336   merge->coi           = PETSC_NULL;
3337   merge->coj           = PETSC_NULL;
3338   merge->owners_co     = PETSC_NULL;
3339 
3340   /* attach the supporting struct to B_mpi for reuse */
3341   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3342   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3343   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3344   *mpimat = B_mpi;
3345 
3346   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3347   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3348   PetscFunctionReturn(0);
3349 }
3350 
3351 static PetscEvent logkey_seqstompi = 0;
3352 #undef __FUNCT__
3353 #define __FUNCT__ "MatMerge_SeqsToMPI"
3354 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3355 {
3356   PetscErrorCode   ierr;
3357 
3358   PetscFunctionBegin;
3359   if (!logkey_seqstompi) {
3360     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3361   }
3362   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3363   if (scall == MAT_INITIAL_MATRIX){
3364     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3365   }
3366   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3367   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3368   PetscFunctionReturn(0);
3369 }
3370 static PetscEvent logkey_getlocalmat = 0;
3371 #undef __FUNCT__
3372 #define __FUNCT__ "MatGetLocalMat"
3373 /*@C
3374      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3375 
3376     Not Collective
3377 
3378    Input Parameters:
3379 +    A - the matrix
3380 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3381 
3382    Output Parameter:
3383 .    A_loc - the local sequential matrix generated
3384 
3385     Level: developer
3386 
3387 @*/
3388 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3389 {
3390   PetscErrorCode  ierr;
3391   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3392   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3393   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3394   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3395   PetscInt        am=A->rmap.n,i,j,k,cstart=A->cmap.rstart;
3396   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3397 
3398   PetscFunctionBegin;
3399   if (!logkey_getlocalmat) {
3400     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3401   }
3402   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3403   if (scall == MAT_INITIAL_MATRIX){
3404     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3405     ci[0] = 0;
3406     for (i=0; i<am; i++){
3407       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3408     }
3409     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3410     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3411     k = 0;
3412     for (i=0; i<am; i++) {
3413       ncols_o = bi[i+1] - bi[i];
3414       ncols_d = ai[i+1] - ai[i];
3415       /* off-diagonal portion of A */
3416       for (jo=0; jo<ncols_o; jo++) {
3417         col = cmap[*bj];
3418         if (col >= cstart) break;
3419         cj[k]   = col; bj++;
3420         ca[k++] = *ba++;
3421       }
3422       /* diagonal portion of A */
3423       for (j=0; j<ncols_d; j++) {
3424         cj[k]   = cstart + *aj++;
3425         ca[k++] = *aa++;
3426       }
3427       /* off-diagonal portion of A */
3428       for (j=jo; j<ncols_o; j++) {
3429         cj[k]   = cmap[*bj++];
3430         ca[k++] = *ba++;
3431       }
3432     }
3433     /* put together the new matrix */
3434     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap.N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3435     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3436     /* Since these are PETSc arrays, change flags to free them as necessary. */
3437     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3438     mat->freedata = PETSC_TRUE;
3439     mat->nonew    = 0;
3440   } else if (scall == MAT_REUSE_MATRIX){
3441     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3442     ci = mat->i; cj = mat->j; ca = mat->a;
3443     for (i=0; i<am; i++) {
3444       /* off-diagonal portion of A */
3445       ncols_o = bi[i+1] - bi[i];
3446       for (jo=0; jo<ncols_o; jo++) {
3447         col = cmap[*bj];
3448         if (col >= cstart) break;
3449         *ca++ = *ba++; bj++;
3450       }
3451       /* diagonal portion of A */
3452       ncols_d = ai[i+1] - ai[i];
3453       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3454       /* off-diagonal portion of A */
3455       for (j=jo; j<ncols_o; j++) {
3456         *ca++ = *ba++; bj++;
3457       }
3458     }
3459   } else {
3460     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3461   }
3462 
3463   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 static PetscEvent logkey_getlocalmatcondensed = 0;
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatGetLocalMatCondensed"
3470 /*@C
3471      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3472 
3473     Not Collective
3474 
3475    Input Parameters:
3476 +    A - the matrix
3477 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3478 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3479 
3480    Output Parameter:
3481 .    A_loc - the local sequential matrix generated
3482 
3483     Level: developer
3484 
3485 @*/
3486 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3487 {
3488   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3489   PetscErrorCode    ierr;
3490   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3491   IS                isrowa,iscola;
3492   Mat               *aloc;
3493 
3494   PetscFunctionBegin;
3495   if (!logkey_getlocalmatcondensed) {
3496     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3497   }
3498   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3499   if (!row){
3500     start = A->rmap.rstart; end = A->rmap.rend;
3501     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3502   } else {
3503     isrowa = *row;
3504   }
3505   if (!col){
3506     start = A->cmap.rstart;
3507     cmap  = a->garray;
3508     nzA   = a->A->cmap.n;
3509     nzB   = a->B->cmap.n;
3510     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3511     ncols = 0;
3512     for (i=0; i<nzB; i++) {
3513       if (cmap[i] < start) idx[ncols++] = cmap[i];
3514       else break;
3515     }
3516     imark = i;
3517     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3518     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3519     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3520     ierr = PetscFree(idx);CHKERRQ(ierr);
3521   } else {
3522     iscola = *col;
3523   }
3524   if (scall != MAT_INITIAL_MATRIX){
3525     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3526     aloc[0] = *A_loc;
3527   }
3528   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3529   *A_loc = aloc[0];
3530   ierr = PetscFree(aloc);CHKERRQ(ierr);
3531   if (!row){
3532     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3533   }
3534   if (!col){
3535     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3536   }
3537   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3538   PetscFunctionReturn(0);
3539 }
3540 
3541 static PetscEvent logkey_GetBrowsOfAcols = 0;
3542 #undef __FUNCT__
3543 #define __FUNCT__ "MatGetBrowsOfAcols"
3544 /*@C
3545     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3546 
3547     Collective on Mat
3548 
3549    Input Parameters:
3550 +    A,B - the matrices in mpiaij format
3551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3552 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3553 
3554    Output Parameter:
3555 +    rowb, colb - index sets of rows and columns of B to extract
3556 .    brstart - row index of B_seq from which next B->rmap.n rows are taken from B's local rows
3557 -    B_seq - the sequential matrix generated
3558 
3559     Level: developer
3560 
3561 @*/
3562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3563 {
3564   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3565   PetscErrorCode    ierr;
3566   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3567   IS                isrowb,iscolb;
3568   Mat               *bseq;
3569 
3570   PetscFunctionBegin;
3571   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3572     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3573   }
3574   if (!logkey_GetBrowsOfAcols) {
3575     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3576   }
3577   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3578 
3579   if (scall == MAT_INITIAL_MATRIX){
3580     start = A->cmap.rstart;
3581     cmap  = a->garray;
3582     nzA   = a->A->cmap.n;
3583     nzB   = a->B->cmap.n;
3584     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3585     ncols = 0;
3586     for (i=0; i<nzB; i++) {  /* row < local row index */
3587       if (cmap[i] < start) idx[ncols++] = cmap[i];
3588       else break;
3589     }
3590     imark = i;
3591     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3592     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3593     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3594     ierr = PetscFree(idx);CHKERRQ(ierr);
3595     *brstart = imark;
3596     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap.N,0,1,&iscolb);CHKERRQ(ierr);
3597   } else {
3598     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3599     isrowb = *rowb; iscolb = *colb;
3600     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3601     bseq[0] = *B_seq;
3602   }
3603   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3604   *B_seq = bseq[0];
3605   ierr = PetscFree(bseq);CHKERRQ(ierr);
3606   if (!rowb){
3607     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3608   } else {
3609     *rowb = isrowb;
3610   }
3611   if (!colb){
3612     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3613   } else {
3614     *colb = iscolb;
3615   }
3616   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3617   PetscFunctionReturn(0);
3618 }
3619 
3620 static PetscEvent logkey_GetBrowsOfAocols = 0;
3621 #undef __FUNCT__
3622 #define __FUNCT__ "MatGetBrowsOfAoCols"
3623 /*@C
3624     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3625     of the OFF-DIAGONAL portion of local A
3626 
3627     Collective on Mat
3628 
3629    Input Parameters:
3630 +    A,B - the matrices in mpiaij format
3631 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3632 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3633 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3634 
3635    Output Parameter:
3636 +    B_oth - the sequential matrix generated
3637 
3638     Level: developer
3639 
3640 @*/
3641 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3642 {
3643   VecScatter_MPI_General *gen_to,*gen_from;
3644   PetscErrorCode         ierr;
3645   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
3646   Mat_SeqAIJ             *b_oth;
3647   VecScatter             ctx=a->Mvctx;
3648   MPI_Comm               comm=ctx->comm;
3649   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3650   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap.n,row,*b_othi,*b_othj;
3651   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3652   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3653   MPI_Request            *rwaits,*swaits;
3654   MPI_Status             *sstatus,rstatus;
3655   PetscInt               *cols;
3656   PetscScalar            *vals;
3657   PetscMPIInt            j;
3658 
3659   PetscFunctionBegin;
3660   if (A->cmap.rstart != B->rmap.rstart || A->cmap.rend != B->rmap.rend){
3661     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap.rstart,A->cmap.rend,B->rmap.rstart,B->rmap.rend);
3662   }
3663   if (!logkey_GetBrowsOfAocols) {
3664     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3665   }
3666   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3667   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3668 
3669   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3670   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3671   rvalues  = gen_from->values; /* holds the length of sending row */
3672   svalues  = gen_to->values;   /* holds the length of receiving row */
3673   nrecvs   = gen_from->n;
3674   nsends   = gen_to->n;
3675   rwaits   = gen_from->requests;
3676   swaits   = gen_to->requests;
3677   srow     = gen_to->indices;   /* local row index to be sent */
3678   rstarts  = gen_from->starts;
3679   sstarts  = gen_to->starts;
3680   rprocs   = gen_from->procs;
3681   sprocs   = gen_to->procs;
3682   sstatus  = gen_to->sstatus;
3683 
3684   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3685   if (scall == MAT_INITIAL_MATRIX){
3686     /* i-array */
3687     /*---------*/
3688     /*  post receives */
3689     for (i=0; i<nrecvs; i++){
3690       rowlen = (PetscInt*)rvalues + rstarts[i];
3691       nrows = rstarts[i+1]-rstarts[i];
3692       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3693     }
3694 
3695     /* pack the outgoing message */
3696     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3697     rstartsj = sstartsj + nsends +1;
3698     sstartsj[0] = 0;  rstartsj[0] = 0;
3699     len = 0; /* total length of j or a array to be sent */
3700     k = 0;
3701     for (i=0; i<nsends; i++){
3702       rowlen = (PetscInt*)svalues + sstarts[i];
3703       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3704       for (j=0; j<nrows; j++) {
3705         row = srow[k] + B->rmap.range[rank]; /* global row idx */
3706         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3707         len += rowlen[j];
3708         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3709         k++;
3710       }
3711       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3712        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3713     }
3714     /* recvs and sends of i-array are completed */
3715     i = nrecvs;
3716     while (i--) {
3717       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3718     }
3719     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3720     /* allocate buffers for sending j and a arrays */
3721     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3722     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3723 
3724     /* create i-array of B_oth */
3725     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3726     b_othi[0] = 0;
3727     len = 0; /* total length of j or a array to be received */
3728     k = 0;
3729     for (i=0; i<nrecvs; i++){
3730       rowlen = (PetscInt*)rvalues + rstarts[i];
3731       nrows = rstarts[i+1]-rstarts[i];
3732       for (j=0; j<nrows; j++) {
3733         b_othi[k+1] = b_othi[k] + rowlen[j];
3734         len += rowlen[j]; k++;
3735       }
3736       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3737     }
3738 
3739     /* allocate space for j and a arrrays of B_oth */
3740     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3741     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3742 
3743     /* j-array */
3744     /*---------*/
3745     /*  post receives of j-array */
3746     for (i=0; i<nrecvs; i++){
3747       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3748       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3749     }
3750     k = 0;
3751     for (i=0; i<nsends; i++){
3752       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3753       bufJ = bufj+sstartsj[i];
3754       for (j=0; j<nrows; j++) {
3755         row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3756         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3757         for (l=0; l<ncols; l++){
3758           *bufJ++ = cols[l];
3759         }
3760         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3761       }
3762       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3763     }
3764 
3765     /* recvs and sends of j-array are completed */
3766     i = nrecvs;
3767     while (i--) {
3768       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3769     }
3770     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3771   } else if (scall == MAT_REUSE_MATRIX){
3772     sstartsj = *startsj;
3773     rstartsj = sstartsj + nsends +1;
3774     bufa     = *bufa_ptr;
3775     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3776     b_otha   = b_oth->a;
3777   } else {
3778     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3779   }
3780 
3781   /* a-array */
3782   /*---------*/
3783   /*  post receives of a-array */
3784   for (i=0; i<nrecvs; i++){
3785     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3786     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3787   }
3788   k = 0;
3789   for (i=0; i<nsends; i++){
3790     nrows = sstarts[i+1]-sstarts[i];
3791     bufA = bufa+sstartsj[i];
3792     for (j=0; j<nrows; j++) {
3793       row  = srow[k++] + B->rmap.range[rank]; /* global row idx */
3794       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3795       for (l=0; l<ncols; l++){
3796         *bufA++ = vals[l];
3797       }
3798       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3799 
3800     }
3801     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3802   }
3803   /* recvs and sends of a-array are completed */
3804   i = nrecvs;
3805   while (i--) {
3806     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3807   }
3808    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3809 
3810   if (scall == MAT_INITIAL_MATRIX){
3811     /* put together the new matrix */
3812     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap.N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3813 
3814     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3815     /* Since these are PETSc arrays, change flags to free them as necessary. */
3816     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3817     b_oth->freedata = PETSC_TRUE;
3818     b_oth->nonew    = 0;
3819 
3820     ierr = PetscFree(bufj);CHKERRQ(ierr);
3821     if (!startsj || !bufa_ptr){
3822       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3823       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3824     } else {
3825       *startsj  = sstartsj;
3826       *bufa_ptr = bufa;
3827     }
3828   }
3829   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3830 
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 #undef __FUNCT__
3835 #define __FUNCT__ "MatGetCommunicationStructs"
3836 /*@C
3837   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3838 
3839   Not Collective
3840 
3841   Input Parameters:
3842 . A - The matrix in mpiaij format
3843 
3844   Output Parameter:
3845 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3846 . colmap - A map from global column index to local index into lvec
3847 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3848 
3849   Level: developer
3850 
3851 @*/
3852 #if defined (PETSC_USE_CTABLE)
3853 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3854 #else
3855 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3856 #endif
3857 {
3858   Mat_MPIAIJ *a;
3859 
3860   PetscFunctionBegin;
3861   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3862   PetscValidPointer(lvec, 2)
3863   PetscValidPointer(colmap, 3)
3864   PetscValidPointer(multScatter, 4)
3865   a = (Mat_MPIAIJ *) A->data;
3866   if (lvec) *lvec = a->lvec;
3867   if (colmap) *colmap = a->colmap;
3868   if (multScatter) *multScatter = a->Mvctx;
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 /*MC
3873    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3874 
3875    Options Database Keys:
3876 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3877 
3878   Level: beginner
3879 
3880 .seealso: MatCreateMPIAIJ
3881 M*/
3882 
3883 EXTERN_C_BEGIN
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatCreate_MPIAIJ"
3886 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3887 {
3888   Mat_MPIAIJ     *b;
3889   PetscErrorCode ierr;
3890   PetscMPIInt    size;
3891 
3892   PetscFunctionBegin;
3893   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3894 
3895   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3896   B->data         = (void*)b;
3897   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3898   B->factor       = 0;
3899   B->rmap.bs      = 1;
3900   B->assembled    = PETSC_FALSE;
3901   B->mapping      = 0;
3902 
3903   B->insertmode      = NOT_SET_VALUES;
3904   b->size            = size;
3905   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3906 
3907   /* build cache for off array entries formed */
3908   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3909   b->donotstash  = PETSC_FALSE;
3910   b->colmap      = 0;
3911   b->garray      = 0;
3912   b->roworiented = PETSC_TRUE;
3913 
3914   /* stuff used for matrix vector multiply */
3915   b->lvec      = PETSC_NULL;
3916   b->Mvctx     = PETSC_NULL;
3917 
3918   /* stuff for MatGetRow() */
3919   b->rowindices   = 0;
3920   b->rowvalues    = 0;
3921   b->getrowactive = PETSC_FALSE;
3922 
3923 
3924   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3925                                      "MatStoreValues_MPIAIJ",
3926                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3927   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3928                                      "MatRetrieveValues_MPIAIJ",
3929                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3930   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3931 				     "MatGetDiagonalBlock_MPIAIJ",
3932                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3933   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3934 				     "MatIsTranspose_MPIAIJ",
3935 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3936   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3937 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3938 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3939   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3940 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3941 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3942   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3943 				     "MatDiagonalScaleLocal_MPIAIJ",
3944 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3945   PetscFunctionReturn(0);
3946 }
3947 EXTERN_C_END
3948 
3949 /*
3950     Special version for direct calls from Fortran
3951 */
3952 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3953 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3954 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3955 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3956 #endif
3957 
3958 /* Change these macros so can be used in void function */
3959 #undef CHKERRQ
3960 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3961 #undef SETERRQ2
3962 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3963 #undef SETERRQ
3964 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3965 
3966 EXTERN_C_BEGIN
3967 #undef __FUNCT__
3968 #define __FUNCT__ "matsetvaluesmpiaij_"
3969 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3970 {
3971   Mat            mat = *mmat;
3972   PetscInt       m = *mm, n = *mn;
3973   InsertMode     addv = *maddv;
3974   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3975   PetscScalar    value;
3976   PetscErrorCode ierr;
3977 
3978   MatPreallocated(mat);
3979   if (mat->insertmode == NOT_SET_VALUES) {
3980     mat->insertmode = addv;
3981   }
3982 #if defined(PETSC_USE_DEBUG)
3983   else if (mat->insertmode != addv) {
3984     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3985   }
3986 #endif
3987   {
3988   PetscInt       i,j,rstart = mat->rmap.rstart,rend = mat->rmap.rend;
3989   PetscInt       cstart = mat->cmap.rstart,cend = mat->cmap.rend,row,col;
3990   PetscTruth     roworiented = aij->roworiented;
3991 
3992   /* Some Variables required in the macro */
3993   Mat            A = aij->A;
3994   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
3995   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
3996   PetscScalar    *aa = a->a;
3997   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
3998   Mat            B = aij->B;
3999   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4000   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap.n,am = aij->A->rmap.n;
4001   PetscScalar    *ba = b->a;
4002 
4003   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4004   PetscInt       nonew = a->nonew;
4005   PetscScalar    *ap1,*ap2;
4006 
4007   PetscFunctionBegin;
4008   for (i=0; i<m; i++) {
4009     if (im[i] < 0) continue;
4010 #if defined(PETSC_USE_DEBUG)
4011     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
4012 #endif
4013     if (im[i] >= rstart && im[i] < rend) {
4014       row      = im[i] - rstart;
4015       lastcol1 = -1;
4016       rp1      = aj + ai[row];
4017       ap1      = aa + ai[row];
4018       rmax1    = aimax[row];
4019       nrow1    = ailen[row];
4020       low1     = 0;
4021       high1    = nrow1;
4022       lastcol2 = -1;
4023       rp2      = bj + bi[row];
4024       ap2      = ba + bi[row];
4025       rmax2    = bimax[row];
4026       nrow2    = bilen[row];
4027       low2     = 0;
4028       high2    = nrow2;
4029 
4030       for (j=0; j<n; j++) {
4031         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4032         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4033         if (in[j] >= cstart && in[j] < cend){
4034           col = in[j] - cstart;
4035           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4036         } else if (in[j] < 0) continue;
4037 #if defined(PETSC_USE_DEBUG)
4038         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
4039 #endif
4040         else {
4041           if (mat->was_assembled) {
4042             if (!aij->colmap) {
4043               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4044             }
4045 #if defined (PETSC_USE_CTABLE)
4046             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4047 	    col--;
4048 #else
4049             col = aij->colmap[in[j]] - 1;
4050 #endif
4051             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4052               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4053               col =  in[j];
4054               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4055               B = aij->B;
4056               b = (Mat_SeqAIJ*)B->data;
4057               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4058               rp2      = bj + bi[row];
4059               ap2      = ba + bi[row];
4060               rmax2    = bimax[row];
4061               nrow2    = bilen[row];
4062               low2     = 0;
4063               high2    = nrow2;
4064               bm       = aij->B->rmap.n;
4065               ba = b->a;
4066             }
4067           } else col = in[j];
4068           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4069         }
4070       }
4071     } else {
4072       if (!aij->donotstash) {
4073         if (roworiented) {
4074           if (ignorezeroentries && v[i*n] == 0.0) continue;
4075           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4076         } else {
4077           if (ignorezeroentries && v[i] == 0.0) continue;
4078           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4079         }
4080       }
4081     }
4082   }}
4083   PetscFunctionReturnVoid();
4084 }
4085 EXTERN_C_END
4086