xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 357abbc83298e1b9fe4d8101e73ddb06b38979df)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"   /*I "petscmat.h" I*/
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (col <= lastcol1) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; high1++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (col <= lastcol2) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; high2++;\
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
107 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
108 {
109   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
110   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
111   PetscErrorCode ierr;
112   PetscInt       l,*garray = mat->garray,diag;
113 
114   PetscFunctionBegin;
115   /* code only works for square matrices A */
116 
117   /* find size of row to the left of the diagonal part */
118   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
119   row  = row - diag;
120   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
121     if (garray[b->j[b->i[row]+l]] > diag) break;
122   }
123   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
124 
125   /* diagonal part */
126   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
127 
128   /* right of diagonal part */
129   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
130   PetscFunctionReturn(0);
131 }
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "MatSetValues_MPIAIJ"
135 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
136 {
137   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
138   PetscScalar    value;
139   PetscErrorCode ierr;
140   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
141   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
142   PetscTruth     roworiented = aij->roworiented;
143 
144   /* Some Variables required in the macro */
145   Mat            A = aij->A;
146   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
147   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
148   PetscScalar    *aa = a->a;
149   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
150   Mat            B = aij->B;
151   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
152   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
153   PetscScalar    *ba = b->a;
154 
155   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
156   PetscInt       nonew = a->nonew;
157   PetscScalar    *ap1,*ap2;
158 
159   PetscFunctionBegin;
160   for (i=0; i<m; i++) {
161     if (im[i] < 0) continue;
162 #if defined(PETSC_USE_DEBUG)
163     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
164 #endif
165     if (im[i] >= rstart && im[i] < rend) {
166       row      = im[i] - rstart;
167       lastcol1 = -1;
168       rp1      = aj + ai[row];
169       ap1      = aa + ai[row];
170       rmax1    = aimax[row];
171       nrow1    = ailen[row];
172       low1     = 0;
173       high1    = nrow1;
174       lastcol2 = -1;
175       rp2      = bj + bi[row];
176       ap2      = ba + bi[row];
177       rmax2    = bimax[row];
178       nrow2    = bilen[row];
179       low2     = 0;
180       high2    = nrow2;
181 
182       for (j=0; j<n; j++) {
183         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
184         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
185         if (in[j] >= cstart && in[j] < cend){
186           col = in[j] - cstart;
187           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
188         } else if (in[j] < 0) continue;
189 #if defined(PETSC_USE_DEBUG)
190         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
191 #endif
192         else {
193           if (mat->was_assembled) {
194             if (!aij->colmap) {
195               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
196             }
197 #if defined (PETSC_USE_CTABLE)
198             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
199 	    col--;
200 #else
201             col = aij->colmap[in[j]] - 1;
202 #endif
203             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
204               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
205               col =  in[j];
206               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
207               B = aij->B;
208               b = (Mat_SeqAIJ*)B->data;
209               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
210               rp2      = bj + bi[row];
211               ap2      = ba + bi[row];
212               rmax2    = bimax[row];
213               nrow2    = bilen[row];
214               low2     = 0;
215               high2    = nrow2;
216               bm       = aij->B->m;
217               ba = b->a;
218             }
219           } else col = in[j];
220           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
221         }
222       }
223     } else {
224       if (!aij->donotstash) {
225         if (roworiented) {
226           if (ignorezeroentries && v[i*n] == 0.0) continue;
227           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
228         } else {
229           if (ignorezeroentries && v[i] == 0.0) continue;
230           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
231         }
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatGetValues_MPIAIJ"
241 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
242 {
243   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
244   PetscErrorCode ierr;
245   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
246   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
247 
248   PetscFunctionBegin;
249   for (i=0; i<m; i++) {
250     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
251     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
252     if (idxm[i] >= rstart && idxm[i] < rend) {
253       row = idxm[i] - rstart;
254       for (j=0; j<n; j++) {
255         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
256         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
257         if (idxn[j] >= cstart && idxn[j] < cend){
258           col = idxn[j] - cstart;
259           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
260         } else {
261           if (!aij->colmap) {
262             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
263           }
264 #if defined (PETSC_USE_CTABLE)
265           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
266           col --;
267 #else
268           col = aij->colmap[idxn[j]] - 1;
269 #endif
270           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
271           else {
272             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
273           }
274         }
275       }
276     } else {
277       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
278     }
279   }
280   PetscFunctionReturn(0);
281 }
282 
283 #undef __FUNCT__
284 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
285 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
286 {
287   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       nstash,reallocs;
290   InsertMode     addv;
291 
292   PetscFunctionBegin;
293   if (aij->donotstash) {
294     PetscFunctionReturn(0);
295   }
296 
297   /* make sure all processors are either in INSERTMODE or ADDMODE */
298   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
299   if (addv == (ADD_VALUES|INSERT_VALUES)) {
300     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
301   }
302   mat->insertmode = addv; /* in case this processor had no cache */
303 
304   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
305   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
306   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
307   PetscFunctionReturn(0);
308 }
309 
310 #undef __FUNCT__
311 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
312 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
313 {
314   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
315   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
316   PetscErrorCode ierr;
317   PetscMPIInt    n;
318   PetscInt       i,j,rstart,ncols,flg;
319   PetscInt       *row,*col,other_disassembled;
320   PetscScalar    *val;
321   InsertMode     addv = mat->insertmode;
322 
323   PetscFunctionBegin;
324   if (!aij->donotstash) {
325     while (1) {
326       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
327       if (!flg) break;
328 
329       for (i=0; i<n;) {
330         /* Now identify the consecutive vals belonging to the same row */
331         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
332         if (j < n) ncols = j-i;
333         else       ncols = n-i;
334         /* Now assemble all these values with a single function call */
335         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
336         i = j;
337       }
338     }
339     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
340   }
341   a->compressedrow.use     = PETSC_FALSE;
342   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
343   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
344 
345   /* determine if any processor has disassembled, if so we must
346      also disassemble ourselfs, in order that we may reassemble. */
347   /*
348      if nonzero structure of submatrix B cannot change then we know that
349      no processor disassembled thus we can skip this stuff
350   */
351   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
352     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
353     if (mat->was_assembled && !other_disassembled) {
354       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
355     }
356   }
357   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
358   b    = (Mat_SeqAIJ *)aij->B->data;
359 
360   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
361     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
362   }
363   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
364   b->compressedrow.use = PETSC_TRUE;
365   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
366   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
367 
368   if (aij->rowvalues) {
369     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
370     aij->rowvalues = 0;
371   }
372 
373   /* used by MatAXPY() */
374   a->xtoy = 0; b->xtoy = 0;
375   a->XtoY = 0; b->XtoY = 0;
376 
377   PetscFunctionReturn(0);
378 }
379 
380 #undef __FUNCT__
381 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
382 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
383 {
384   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
385   PetscErrorCode ierr;
386 
387   PetscFunctionBegin;
388   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
389   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatZeroRows_MPIAIJ"
395 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
396 {
397   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
398   PetscErrorCode ierr;
399   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
400   PetscInt       i,*owners = l->rowners;
401   PetscInt       *nprocs,j,idx,nsends,row;
402   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
403   PetscInt       *rvalues,count,base,slen,*source;
404   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
405   MPI_Comm       comm = A->comm;
406   MPI_Request    *send_waits,*recv_waits;
407   MPI_Status     recv_status,*send_status;
408 #if defined(PETSC_DEBUG)
409   PetscTruth     found = PETSC_FALSE;
410 #endif
411 
412   PetscFunctionBegin;
413   /*  first count number of contributors to each processor */
414   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
415   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
416   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
417   j = 0;
418   for (i=0; i<N; i++) {
419     if (lastidx > (idx = rows[i])) j = 0;
420     lastidx = idx;
421     for (; j<size; j++) {
422       if (idx >= owners[j] && idx < owners[j+1]) {
423         nprocs[2*j]++;
424         nprocs[2*j+1] = 1;
425         owner[i] = j;
426 #if defined(PETSC_DEBUG)
427         found = PETSC_TRUE;
428 #endif
429         break;
430       }
431     }
432 #if defined(PETSC_DEBUG)
433     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
434     found = PETSC_FALSE;
435 #endif
436   }
437   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
438 
439   /* inform other processors of number of messages and max length*/
440   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
441 
442   /* post receives:   */
443   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
444   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
445   for (i=0; i<nrecvs; i++) {
446     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
447   }
448 
449   /* do sends:
450       1) starts[i] gives the starting index in svalues for stuff going to
451          the ith processor
452   */
453   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
454   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
455   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
456   starts[0] = 0;
457   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
458   for (i=0; i<N; i++) {
459     svalues[starts[owner[i]]++] = rows[i];
460   }
461 
462   starts[0] = 0;
463   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
464   count = 0;
465   for (i=0; i<size; i++) {
466     if (nprocs[2*i+1]) {
467       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
468     }
469   }
470   ierr = PetscFree(starts);CHKERRQ(ierr);
471 
472   base = owners[rank];
473 
474   /*  wait on receives */
475   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
476   source = lens + nrecvs;
477   count  = nrecvs; slen = 0;
478   while (count) {
479     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
480     /* unpack receives into our local space */
481     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
482     source[imdex]  = recv_status.MPI_SOURCE;
483     lens[imdex]    = n;
484     slen          += n;
485     count--;
486   }
487   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
488 
489   /* move the data into the send scatter */
490   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
491   count = 0;
492   for (i=0; i<nrecvs; i++) {
493     values = rvalues + i*nmax;
494     for (j=0; j<lens[i]; j++) {
495       lrows[count++] = values[j] - base;
496     }
497   }
498   ierr = PetscFree(rvalues);CHKERRQ(ierr);
499   ierr = PetscFree(lens);CHKERRQ(ierr);
500   ierr = PetscFree(owner);CHKERRQ(ierr);
501   ierr = PetscFree(nprocs);CHKERRQ(ierr);
502 
503   /* actually zap the local rows */
504   /*
505         Zero the required rows. If the "diagonal block" of the matrix
506      is square and the user wishes to set the diagonal we use separate
507      code so that MatSetValues() is not called for each diagonal allocating
508      new memory, thus calling lots of mallocs and slowing things down.
509 
510        Contributed by: Matthew Knepley
511   */
512   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
513   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
514   if ((diag != 0.0) && (l->A->M == l->A->N)) {
515     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
516   } else if (diag != 0.0) {
517     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
518     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
519       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
520 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
521     }
522     for (i = 0; i < slen; i++) {
523       row  = lrows[i] + rstart;
524       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
525     }
526     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
527     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
528   } else {
529     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
530   }
531   ierr = PetscFree(lrows);CHKERRQ(ierr);
532 
533   /* wait on sends */
534   if (nsends) {
535     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
536     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
537     ierr = PetscFree(send_status);CHKERRQ(ierr);
538   }
539   ierr = PetscFree(send_waits);CHKERRQ(ierr);
540   ierr = PetscFree(svalues);CHKERRQ(ierr);
541 
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatMult_MPIAIJ"
547 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
548 {
549   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
550   PetscErrorCode ierr;
551   PetscInt       nt;
552 
553   PetscFunctionBegin;
554   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
555   if (nt != A->n) {
556     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
557   }
558   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
559   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
560   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
561   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatMultAdd_MPIAIJ"
567 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
568 {
569   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
570   PetscErrorCode ierr;
571 
572   PetscFunctionBegin;
573   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
574   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
575   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
576   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
582 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
583 {
584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
585   PetscErrorCode ierr;
586   PetscTruth     merged;
587 
588   PetscFunctionBegin;
589   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
590   /* do nondiagonal part */
591   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
592   if (!merged) {
593     /* send it on its way */
594     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
595     /* do local part */
596     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
597     /* receive remote parts: note this assumes the values are not actually */
598     /* added in yy until the next line, */
599     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
600   } else {
601     /* do local part */
602     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
603     /* send it on its way */
604     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
605     /* values actually were received in the Begin() but we need to call this nop */
606     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 EXTERN_C_BEGIN
612 #undef __FUNCT__
613 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
614 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
615 {
616   MPI_Comm       comm;
617   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
618   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
619   IS             Me,Notme;
620   PetscErrorCode ierr;
621   PetscInt       M,N,first,last,*notme,i;
622   PetscMPIInt    size;
623 
624   PetscFunctionBegin;
625 
626   /* Easy test: symmetric diagonal block */
627   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
628   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
629   if (!*f) PetscFunctionReturn(0);
630   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
631   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
632   if (size == 1) PetscFunctionReturn(0);
633 
634   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
635   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
636   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
637   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
638   for (i=0; i<first; i++) notme[i] = i;
639   for (i=last; i<M; i++) notme[i-last+first] = i;
640   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
641   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
642   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
643   Aoff = Aoffs[0];
644   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
645   Boff = Boffs[0];
646   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
647   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
648   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
649   ierr = ISDestroy(Me);CHKERRQ(ierr);
650   ierr = ISDestroy(Notme);CHKERRQ(ierr);
651 
652   PetscFunctionReturn(0);
653 }
654 EXTERN_C_END
655 
656 #undef __FUNCT__
657 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
658 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
659 {
660   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
661   PetscErrorCode ierr;
662 
663   PetscFunctionBegin;
664   /* do nondiagonal part */
665   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
666   /* send it on its way */
667   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
668   /* do local part */
669   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
670   /* receive remote parts */
671   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 
675 /*
676   This only works correctly for square matrices where the subblock A->A is the
677    diagonal block
678 */
679 #undef __FUNCT__
680 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
681 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
682 {
683   PetscErrorCode ierr;
684   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
685 
686   PetscFunctionBegin;
687   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
688   if (a->rstart != a->cstart || a->rend != a->cend) {
689     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
690   }
691   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatScale_MPIAIJ"
697 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
698 {
699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
700   PetscErrorCode ierr;
701 
702   PetscFunctionBegin;
703   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
704   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatDestroy_MPIAIJ"
710 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
711 {
712   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
713   PetscErrorCode ierr;
714 
715   PetscFunctionBegin;
716 #if defined(PETSC_USE_LOG)
717   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
718 #endif
719   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
720   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
721   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
722   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
723 #if defined (PETSC_USE_CTABLE)
724   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
725 #else
726   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
727 #endif
728   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
729   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
730   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
731   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
732   ierr = PetscFree(aij);CHKERRQ(ierr);
733 
734   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
735   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
736   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
737   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
738   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
739   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
740   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatView_MPIAIJ_Binary"
746 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
747 {
748   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
749   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
750   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
751   PetscErrorCode    ierr;
752   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
753   int               fd;
754   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
755   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
756   PetscScalar       *column_values;
757 
758   PetscFunctionBegin;
759   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
760   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
761   nz   = A->nz + B->nz;
762   if (!rank) {
763     header[0] = MAT_FILE_COOKIE;
764     header[1] = mat->M;
765     header[2] = mat->N;
766     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
767     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
768     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
769     /* get largest number of rows any processor has */
770     rlen = mat->m;
771     range = mat->rmap.range;
772     for (i=1; i<size; i++) {
773       rlen = PetscMax(rlen,range[i+1] - range[i]);
774     }
775   } else {
776     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
777     rlen = mat->m;
778   }
779 
780   /* load up the local row counts */
781   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
782   for (i=0; i<mat->m; i++) {
783     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
784   }
785 
786   /* store the row lengths to the file */
787   if (!rank) {
788     MPI_Status status;
789     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
790     for (i=1; i<size; i++) {
791       rlen = range[i+1] - range[i];
792       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
793       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
794     }
795   } else {
796     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
797   }
798   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
799 
800   /* load up the local column indices */
801   nzmax = nz; /* )th processor needs space a largest processor needs */
802   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
803   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
804   cnt  = 0;
805   for (i=0; i<mat->m; i++) {
806     for (j=B->i[i]; j<B->i[i+1]; j++) {
807       if ( (col = garray[B->j[j]]) > cstart) break;
808       column_indices[cnt++] = col;
809     }
810     for (k=A->i[i]; k<A->i[i+1]; k++) {
811       column_indices[cnt++] = A->j[k] + cstart;
812     }
813     for (; j<B->i[i+1]; j++) {
814       column_indices[cnt++] = garray[B->j[j]];
815     }
816   }
817   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
818 
819   /* store the column indices to the file */
820   if (!rank) {
821     MPI_Status status;
822     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     for (i=1; i<size; i++) {
824       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
825       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
826       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
827       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
828     }
829   } else {
830     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
831     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
832   }
833   ierr = PetscFree(column_indices);CHKERRQ(ierr);
834 
835   /* load up the local column values */
836   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
837   cnt  = 0;
838   for (i=0; i<mat->m; i++) {
839     for (j=B->i[i]; j<B->i[i+1]; j++) {
840       if ( garray[B->j[j]] > cstart) break;
841       column_values[cnt++] = B->a[j];
842     }
843     for (k=A->i[i]; k<A->i[i+1]; k++) {
844       column_values[cnt++] = A->a[k];
845     }
846     for (; j<B->i[i+1]; j++) {
847       column_values[cnt++] = B->a[j];
848     }
849   }
850   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
851 
852   /* store the column values to the file */
853   if (!rank) {
854     MPI_Status status;
855     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
856     for (i=1; i<size; i++) {
857       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
859       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
860       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
861     }
862   } else {
863     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
864     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
865   }
866   ierr = PetscFree(column_values);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
872 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
873 {
874   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
875   PetscErrorCode    ierr;
876   PetscMPIInt       rank = aij->rank,size = aij->size;
877   PetscTruth        isdraw,iascii,isbinary;
878   PetscViewer       sviewer;
879   PetscViewerFormat format;
880 
881   PetscFunctionBegin;
882   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
883   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
884   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
885   if (iascii) {
886     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
887     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
888       MatInfo    info;
889       PetscTruth inodes;
890 
891       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
892       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
894       if (!inodes) {
895         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
896 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
897       } else {
898         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
899 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
900       }
901       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
902       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
903       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
904       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
905       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
906       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
907       PetscFunctionReturn(0);
908     } else if (format == PETSC_VIEWER_ASCII_INFO) {
909       PetscInt   inodecount,inodelimit,*inodes;
910       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
911       if (inodes) {
912         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
913       } else {
914         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
915       }
916       PetscFunctionReturn(0);
917     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
918       PetscFunctionReturn(0);
919     }
920   } else if (isbinary) {
921     if (size == 1) {
922       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
923       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
924     } else {
925       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
926     }
927     PetscFunctionReturn(0);
928   } else if (isdraw) {
929     PetscDraw  draw;
930     PetscTruth isnull;
931     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
932     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
933   }
934 
935   if (size == 1) {
936     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
937     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
938   } else {
939     /* assemble the entire matrix onto first processor. */
940     Mat         A;
941     Mat_SeqAIJ  *Aloc;
942     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
943     PetscScalar *a;
944 
945     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
946     if (!rank) {
947       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
948     } else {
949       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
950     }
951     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
952     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
953     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
954     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
955 
956     /* copy over the A part */
957     Aloc = (Mat_SeqAIJ*)aij->A->data;
958     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
959     row = aij->rstart;
960     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
961     for (i=0; i<m; i++) {
962       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
963       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
964     }
965     aj = Aloc->j;
966     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
967 
968     /* copy over the B part */
969     Aloc = (Mat_SeqAIJ*)aij->B->data;
970     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
971     row  = aij->rstart;
972     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
973     ct   = cols;
974     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
975     for (i=0; i<m; i++) {
976       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
977       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
978     }
979     ierr = PetscFree(ct);CHKERRQ(ierr);
980     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
981     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
982     /*
983        Everyone has to call to draw the matrix since the graphics waits are
984        synchronized across all processors that share the PetscDraw object
985     */
986     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
987     if (!rank) {
988       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
989       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
990     }
991     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
992     ierr = MatDestroy(A);CHKERRQ(ierr);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatView_MPIAIJ"
999 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1000 {
1001   PetscErrorCode ierr;
1002   PetscTruth     iascii,isdraw,issocket,isbinary;
1003 
1004   PetscFunctionBegin;
1005   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1006   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1007   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1008   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1009   if (iascii || isdraw || isbinary || issocket) {
1010     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1011   } else {
1012     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1013   }
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatRelax_MPIAIJ"
1021 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1022 {
1023   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1024   PetscErrorCode ierr;
1025   Vec            bb1;
1026 
1027   PetscFunctionBegin;
1028   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1029 
1030   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1031 
1032   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1035       its--;
1036     }
1037 
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1051     if (flag & SOR_ZERO_INITIAL_GUESS) {
1052       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1053       its--;
1054     }
1055     while (its--) {
1056       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1058 
1059       /* update rhs: bb1 = bb - B*x */
1060       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1061       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1062 
1063       /* local sweep */
1064       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1065       CHKERRQ(ierr);
1066     }
1067   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1068     if (flag & SOR_ZERO_INITIAL_GUESS) {
1069       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1070       its--;
1071     }
1072     while (its--) {
1073       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1075 
1076       /* update rhs: bb1 = bb - B*x */
1077       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1078       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1079 
1080       /* local sweep */
1081       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1082       CHKERRQ(ierr);
1083     }
1084   } else {
1085     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1086   }
1087 
1088   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatPermute_MPIAIJ"
1094 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1095 {
1096   MPI_Comm       comm,pcomm;
1097   PetscInt       first,local_size,nrows,*rows;
1098   int            ntids;
1099   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = PetscObjectGetComm((PetscObject)A,&comm); CHKERRQ(ierr);
1104   /* make a collective version of 'rowp' */
1105   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm); CHKERRQ(ierr);
1106   if (pcomm==comm) {
1107     crowp = rowp;
1108   } else {
1109     ierr = ISGetSize(rowp,&nrows); CHKERRQ(ierr);
1110     ierr = ISGetIndices(rowp,&rows); CHKERRQ(ierr);
1111     ierr = ISCreateGeneral(comm,nrows,rows,&crowp); CHKERRQ(ierr);
1112     ierr = ISRestoreIndices(rowp,&rows); CHKERRQ(ierr);
1113   }
1114   /* collect the global row permutation and invert it */
1115   ierr = ISAllGather(crowp,&growp); CHKERRQ(ierr);
1116   ierr = ISSetPermutation(growp); CHKERRQ(ierr);
1117   if (pcomm!=comm) {
1118     ierr = ISDestroy(crowp); CHKERRQ(ierr);
1119   }
1120   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
1121   /* get the local target indices */
1122   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL); CHKERRQ(ierr);
1123   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL); CHKERRQ(ierr);
1124   ierr = ISGetIndices(irowp,&rows); CHKERRQ(ierr);
1125   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp); CHKERRQ(ierr);
1126   ierr = ISRestoreIndices(irowp,&rows); CHKERRQ(ierr);
1127   ierr = ISDestroy(irowp); CHKERRQ(ierr);
1128   /* the column permutation is so much easier;
1129      make a local version of 'colp' and invert it */
1130   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm); CHKERRQ(ierr);
1131   ierr = MPI_Comm_size(pcomm,&ntids); CHKERRQ(ierr);
1132   if (ntids==1) {
1133     lcolp = colp;
1134   } else {
1135     ierr = ISGetSize(colp,&nrows); CHKERRQ(ierr);
1136     ierr = ISGetIndices(colp,&rows); CHKERRQ(ierr);
1137     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp); CHKERRQ(ierr);
1138   }
1139   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp); CHKERRQ(ierr);
1140   ierr = ISSetPermutation(lcolp); CHKERRQ(ierr);
1141   if (ntids>1) {
1142     ierr = ISRestoreIndices(colp,&rows); CHKERRQ(ierr);
1143     ierr = ISDestroy(lcolp); CHKERRQ(ierr);
1144   }
1145   /* now we just get the submatrix */
1146   ierr = MatGetSubMatrix(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B); CHKERRQ(ierr);
1147   /* clean up */
1148   ierr = ISDestroy(lrowp); CHKERRQ(ierr);
1149   ierr = ISDestroy(icolp); CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1155 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1156 {
1157   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1158   Mat            A = mat->A,B = mat->B;
1159   PetscErrorCode ierr;
1160   PetscReal      isend[5],irecv[5];
1161 
1162   PetscFunctionBegin;
1163   info->block_size     = 1.0;
1164   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1166   isend[3] = info->memory;  isend[4] = info->mallocs;
1167   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1168   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1169   isend[3] += info->memory;  isend[4] += info->mallocs;
1170   if (flag == MAT_LOCAL) {
1171     info->nz_used      = isend[0];
1172     info->nz_allocated = isend[1];
1173     info->nz_unneeded  = isend[2];
1174     info->memory       = isend[3];
1175     info->mallocs      = isend[4];
1176   } else if (flag == MAT_GLOBAL_MAX) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   } else if (flag == MAT_GLOBAL_SUM) {
1184     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1185     info->nz_used      = irecv[0];
1186     info->nz_allocated = irecv[1];
1187     info->nz_unneeded  = irecv[2];
1188     info->memory       = irecv[3];
1189     info->mallocs      = irecv[4];
1190   }
1191   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1192   info->fill_ratio_needed = 0;
1193   info->factor_mallocs    = 0;
1194   info->rows_global       = (double)matin->M;
1195   info->columns_global    = (double)matin->N;
1196   info->rows_local        = (double)matin->m;
1197   info->columns_local     = (double)matin->N;
1198 
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatSetOption_MPIAIJ"
1204 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1205 {
1206   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   switch (op) {
1211   case MAT_NO_NEW_NONZERO_LOCATIONS:
1212   case MAT_YES_NEW_NONZERO_LOCATIONS:
1213   case MAT_COLUMNS_UNSORTED:
1214   case MAT_COLUMNS_SORTED:
1215   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1216   case MAT_KEEP_ZEROED_ROWS:
1217   case MAT_NEW_NONZERO_LOCATION_ERR:
1218   case MAT_USE_INODES:
1219   case MAT_DO_NOT_USE_INODES:
1220   case MAT_IGNORE_ZERO_ENTRIES:
1221     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1222     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1223     break;
1224   case MAT_ROW_ORIENTED:
1225     a->roworiented = PETSC_TRUE;
1226     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1227     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1228     break;
1229   case MAT_ROWS_SORTED:
1230   case MAT_ROWS_UNSORTED:
1231   case MAT_YES_NEW_DIAGONALS:
1232     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1233     break;
1234   case MAT_COLUMN_ORIENTED:
1235     a->roworiented = PETSC_FALSE;
1236     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1237     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1238     break;
1239   case MAT_IGNORE_OFF_PROC_ENTRIES:
1240     a->donotstash = PETSC_TRUE;
1241     break;
1242   case MAT_NO_NEW_DIAGONALS:
1243     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1244   case MAT_SYMMETRIC:
1245   case MAT_STRUCTURALLY_SYMMETRIC:
1246   case MAT_HERMITIAN:
1247   case MAT_SYMMETRY_ETERNAL:
1248     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1249     break;
1250   case MAT_NOT_SYMMETRIC:
1251   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1252   case MAT_NOT_HERMITIAN:
1253   case MAT_NOT_SYMMETRY_ETERNAL:
1254     break;
1255   default:
1256     SETERRQ(PETSC_ERR_SUP,"unknown option");
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatGetRow_MPIAIJ"
1263 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1264 {
1265   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1266   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1267   PetscErrorCode ierr;
1268   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1269   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1270   PetscInt       *cmap,*idx_p;
1271 
1272   PetscFunctionBegin;
1273   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1274   mat->getrowactive = PETSC_TRUE;
1275 
1276   if (!mat->rowvalues && (idx || v)) {
1277     /*
1278         allocate enough space to hold information from the longest row.
1279     */
1280     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1281     PetscInt     max = 1,tmp;
1282     for (i=0; i<matin->m; i++) {
1283       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1284       if (max < tmp) { max = tmp; }
1285     }
1286     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1287     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1288   }
1289 
1290   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1291   lrow = row - rstart;
1292 
1293   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1294   if (!v)   {pvA = 0; pvB = 0;}
1295   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1296   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1297   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1298   nztot = nzA + nzB;
1299 
1300   cmap  = mat->garray;
1301   if (v  || idx) {
1302     if (nztot) {
1303       /* Sort by increasing column numbers, assuming A and B already sorted */
1304       PetscInt imark = -1;
1305       if (v) {
1306         *v = v_p = mat->rowvalues;
1307         for (i=0; i<nzB; i++) {
1308           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1309           else break;
1310         }
1311         imark = i;
1312         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1313         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1314       }
1315       if (idx) {
1316         *idx = idx_p = mat->rowindices;
1317         if (imark > -1) {
1318           for (i=0; i<imark; i++) {
1319             idx_p[i] = cmap[cworkB[i]];
1320           }
1321         } else {
1322           for (i=0; i<nzB; i++) {
1323             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1324             else break;
1325           }
1326           imark = i;
1327         }
1328         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1329         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1330       }
1331     } else {
1332       if (idx) *idx = 0;
1333       if (v)   *v   = 0;
1334     }
1335   }
1336   *nz = nztot;
1337   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1338   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1344 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1345 {
1346   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1347 
1348   PetscFunctionBegin;
1349   if (!aij->getrowactive) {
1350     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1351   }
1352   aij->getrowactive = PETSC_FALSE;
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatNorm_MPIAIJ"
1358 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1359 {
1360   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1361   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1362   PetscErrorCode ierr;
1363   PetscInt       i,j,cstart = aij->cstart;
1364   PetscReal      sum = 0.0;
1365   PetscScalar    *v;
1366 
1367   PetscFunctionBegin;
1368   if (aij->size == 1) {
1369     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1370   } else {
1371     if (type == NORM_FROBENIUS) {
1372       v = amat->a;
1373       for (i=0; i<amat->nz; i++) {
1374 #if defined(PETSC_USE_COMPLEX)
1375         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1376 #else
1377         sum += (*v)*(*v); v++;
1378 #endif
1379       }
1380       v = bmat->a;
1381       for (i=0; i<bmat->nz; i++) {
1382 #if defined(PETSC_USE_COMPLEX)
1383         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1384 #else
1385         sum += (*v)*(*v); v++;
1386 #endif
1387       }
1388       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1389       *norm = sqrt(*norm);
1390     } else if (type == NORM_1) { /* max column norm */
1391       PetscReal *tmp,*tmp2;
1392       PetscInt    *jj,*garray = aij->garray;
1393       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1394       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1395       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1396       *norm = 0.0;
1397       v = amat->a; jj = amat->j;
1398       for (j=0; j<amat->nz; j++) {
1399         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1400       }
1401       v = bmat->a; jj = bmat->j;
1402       for (j=0; j<bmat->nz; j++) {
1403         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1404       }
1405       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1406       for (j=0; j<mat->N; j++) {
1407         if (tmp2[j] > *norm) *norm = tmp2[j];
1408       }
1409       ierr = PetscFree(tmp);CHKERRQ(ierr);
1410       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1411     } else if (type == NORM_INFINITY) { /* max row norm */
1412       PetscReal ntemp = 0.0;
1413       for (j=0; j<aij->A->m; j++) {
1414         v = amat->a + amat->i[j];
1415         sum = 0.0;
1416         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1417           sum += PetscAbsScalar(*v); v++;
1418         }
1419         v = bmat->a + bmat->i[j];
1420         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1421           sum += PetscAbsScalar(*v); v++;
1422         }
1423         if (sum > ntemp) ntemp = sum;
1424       }
1425       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1426     } else {
1427       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1428     }
1429   }
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatTranspose_MPIAIJ"
1435 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1436 {
1437   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1438   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1439   PetscErrorCode ierr;
1440   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1441   Mat            B;
1442   PetscScalar    *array;
1443 
1444   PetscFunctionBegin;
1445   if (!matout && M != N) {
1446     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1447   }
1448 
1449   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1450   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1451   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1452   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1453 
1454   /* copy over the A part */
1455   Aloc = (Mat_SeqAIJ*)a->A->data;
1456   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1457   row = a->rstart;
1458   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1459   for (i=0; i<m; i++) {
1460     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1461     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1462   }
1463   aj = Aloc->j;
1464   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1465 
1466   /* copy over the B part */
1467   Aloc = (Mat_SeqAIJ*)a->B->data;
1468   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1469   row  = a->rstart;
1470   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1471   ct   = cols;
1472   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1473   for (i=0; i<m; i++) {
1474     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1475     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1476   }
1477   ierr = PetscFree(ct);CHKERRQ(ierr);
1478   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1479   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1480   if (matout) {
1481     *matout = B;
1482   } else {
1483     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1484   }
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1490 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1491 {
1492   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1493   Mat            a = aij->A,b = aij->B;
1494   PetscErrorCode ierr;
1495   PetscInt       s1,s2,s3;
1496 
1497   PetscFunctionBegin;
1498   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1499   if (rr) {
1500     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1501     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1502     /* Overlap communication with computation. */
1503     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1504   }
1505   if (ll) {
1506     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1507     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1508     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1509   }
1510   /* scale  the diagonal block */
1511   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1512 
1513   if (rr) {
1514     /* Do a scatter end and then right scale the off-diagonal block */
1515     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1516     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1517   }
1518 
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1525 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1526 {
1527   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   if (!a->rank) {
1532     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1539 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1540 {
1541   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1542   PetscErrorCode ierr;
1543 
1544   PetscFunctionBegin;
1545   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1546   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1547   PetscFunctionReturn(0);
1548 }
1549 #undef __FUNCT__
1550 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1551 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1552 {
1553   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1554   PetscErrorCode ierr;
1555 
1556   PetscFunctionBegin;
1557   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatEqual_MPIAIJ"
1563 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1564 {
1565   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1566   Mat            a,b,c,d;
1567   PetscTruth     flg;
1568   PetscErrorCode ierr;
1569 
1570   PetscFunctionBegin;
1571   a = matA->A; b = matA->B;
1572   c = matB->A; d = matB->B;
1573 
1574   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1575   if (flg) {
1576     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1577   }
1578   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatCopy_MPIAIJ"
1584 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1585 {
1586   PetscErrorCode ierr;
1587   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1588   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1589 
1590   PetscFunctionBegin;
1591   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1592   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1593     /* because of the column compression in the off-processor part of the matrix a->B,
1594        the number of columns in a->B and b->B may be different, hence we cannot call
1595        the MatCopy() directly on the two parts. If need be, we can provide a more
1596        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1597        then copying the submatrices */
1598     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1599   } else {
1600     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1601     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1602   }
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1608 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1609 {
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #include "petscblaslapack.h"
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatAXPY_MPIAIJ"
1620 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1621 {
1622   PetscErrorCode ierr;
1623   PetscInt       i;
1624   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1625   PetscBLASInt   bnz,one=1;
1626   Mat_SeqAIJ     *x,*y;
1627 
1628   PetscFunctionBegin;
1629   if (str == SAME_NONZERO_PATTERN) {
1630     PetscScalar alpha = a;
1631     x = (Mat_SeqAIJ *)xx->A->data;
1632     y = (Mat_SeqAIJ *)yy->A->data;
1633     bnz = (PetscBLASInt)x->nz;
1634     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1635     x = (Mat_SeqAIJ *)xx->B->data;
1636     y = (Mat_SeqAIJ *)yy->B->data;
1637     bnz = (PetscBLASInt)x->nz;
1638     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1639   } else if (str == SUBSET_NONZERO_PATTERN) {
1640     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1641 
1642     x = (Mat_SeqAIJ *)xx->B->data;
1643     y = (Mat_SeqAIJ *)yy->B->data;
1644     if (y->xtoy && y->XtoY != xx->B) {
1645       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1646       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1647     }
1648     if (!y->xtoy) { /* get xtoy */
1649       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1650       y->XtoY = xx->B;
1651     }
1652     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1653   } else {
1654     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1655   }
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatConjugate_MPIAIJ"
1663 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1664 {
1665 #if defined(PETSC_USE_COMPLEX)
1666   PetscErrorCode ierr;
1667   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1671   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1672 #else
1673   PetscFunctionBegin;
1674 #endif
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatRealPart_MPIAIJ"
1680 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
1681 {
1682   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1683   PetscErrorCode ierr;
1684 
1685   PetscFunctionBegin;
1686   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1687   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
1693 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
1694 {
1695   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1696   PetscErrorCode ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1700   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 /* -------------------------------------------------------------------*/
1705 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1706        MatGetRow_MPIAIJ,
1707        MatRestoreRow_MPIAIJ,
1708        MatMult_MPIAIJ,
1709 /* 4*/ MatMultAdd_MPIAIJ,
1710        MatMultTranspose_MPIAIJ,
1711        MatMultTransposeAdd_MPIAIJ,
1712        0,
1713        0,
1714        0,
1715 /*10*/ 0,
1716        0,
1717        0,
1718        MatRelax_MPIAIJ,
1719        MatTranspose_MPIAIJ,
1720 /*15*/ MatGetInfo_MPIAIJ,
1721        MatEqual_MPIAIJ,
1722        MatGetDiagonal_MPIAIJ,
1723        MatDiagonalScale_MPIAIJ,
1724        MatNorm_MPIAIJ,
1725 /*20*/ MatAssemblyBegin_MPIAIJ,
1726        MatAssemblyEnd_MPIAIJ,
1727        0,
1728        MatSetOption_MPIAIJ,
1729        MatZeroEntries_MPIAIJ,
1730 /*25*/ MatZeroRows_MPIAIJ,
1731        0,
1732        0,
1733        0,
1734        0,
1735 /*30*/ MatSetUpPreallocation_MPIAIJ,
1736        0,
1737        0,
1738        0,
1739        0,
1740 /*35*/ MatDuplicate_MPIAIJ,
1741        0,
1742        0,
1743        0,
1744        0,
1745 /*40*/ MatAXPY_MPIAIJ,
1746        MatGetSubMatrices_MPIAIJ,
1747        MatIncreaseOverlap_MPIAIJ,
1748        MatGetValues_MPIAIJ,
1749        MatCopy_MPIAIJ,
1750 /*45*/ MatPrintHelp_MPIAIJ,
1751        MatScale_MPIAIJ,
1752        0,
1753        0,
1754        0,
1755 /*50*/ MatSetBlockSize_MPIAIJ,
1756        0,
1757        0,
1758        0,
1759        0,
1760 /*55*/ MatFDColoringCreate_MPIAIJ,
1761        0,
1762        MatSetUnfactored_MPIAIJ,
1763        MatPermute_MPIAIJ,
1764        0,
1765 /*60*/ MatGetSubMatrix_MPIAIJ,
1766        MatDestroy_MPIAIJ,
1767        MatView_MPIAIJ,
1768        0,
1769        0,
1770 /*65*/ 0,
1771        0,
1772        0,
1773        0,
1774        0,
1775 /*70*/ 0,
1776        0,
1777        MatSetColoring_MPIAIJ,
1778 #if defined(PETSC_HAVE_ADIC)
1779        MatSetValuesAdic_MPIAIJ,
1780 #else
1781        0,
1782 #endif
1783        MatSetValuesAdifor_MPIAIJ,
1784 /*75*/ 0,
1785        0,
1786        0,
1787        0,
1788        0,
1789 /*80*/ 0,
1790        0,
1791        0,
1792        0,
1793 /*84*/ MatLoad_MPIAIJ,
1794        0,
1795        0,
1796        0,
1797        0,
1798        0,
1799 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1800        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1801        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1802        MatPtAP_Basic,
1803        MatPtAPSymbolic_MPIAIJ,
1804 /*95*/ MatPtAPNumeric_MPIAIJ,
1805        0,
1806        0,
1807        0,
1808        0,
1809 /*100*/0,
1810        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1811        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1812        MatConjugate_MPIAIJ,
1813        0,
1814 /*105*/MatSetValuesRow_MPIAIJ,
1815        MatRealPart_MPIAIJ,
1816        MatImaginaryPart_MPIAIJ};
1817 
1818 /* ----------------------------------------------------------------------------------------*/
1819 
1820 EXTERN_C_BEGIN
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1823 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1824 {
1825   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1830   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 EXTERN_C_BEGIN
1836 #undef __FUNCT__
1837 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1838 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1839 {
1840   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1845   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 EXTERN_C_END
1849 
1850 #include "petscpc.h"
1851 EXTERN_C_BEGIN
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1855 {
1856   Mat_MPIAIJ     *b;
1857   PetscErrorCode ierr;
1858   PetscInt       i;
1859 
1860   PetscFunctionBegin;
1861   B->preallocated = PETSC_TRUE;
1862   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1863   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1864   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1865   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1866   if (d_nnz) {
1867     for (i=0; i<B->m; i++) {
1868       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1869     }
1870   }
1871   if (o_nnz) {
1872     for (i=0; i<B->m; i++) {
1873       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1874     }
1875   }
1876   b = (Mat_MPIAIJ*)B->data;
1877   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1878   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1879 
1880   PetscFunctionReturn(0);
1881 }
1882 EXTERN_C_END
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1887 {
1888   Mat            mat;
1889   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   *newmat       = 0;
1894   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1895   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1896   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1897   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1898   a    = (Mat_MPIAIJ*)mat->data;
1899 
1900   mat->factor       = matin->factor;
1901   mat->bs           = matin->bs;
1902   mat->assembled    = PETSC_TRUE;
1903   mat->insertmode   = NOT_SET_VALUES;
1904   mat->preallocated = PETSC_TRUE;
1905 
1906   a->rstart       = oldmat->rstart;
1907   a->rend         = oldmat->rend;
1908   a->cstart       = oldmat->cstart;
1909   a->cend         = oldmat->cend;
1910   a->size         = oldmat->size;
1911   a->rank         = oldmat->rank;
1912   a->donotstash   = oldmat->donotstash;
1913   a->roworiented  = oldmat->roworiented;
1914   a->rowindices   = 0;
1915   a->rowvalues    = 0;
1916   a->getrowactive = PETSC_FALSE;
1917 
1918   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1919   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1920   if (oldmat->colmap) {
1921 #if defined (PETSC_USE_CTABLE)
1922     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1923 #else
1924     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1925     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1926     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1927 #endif
1928   } else a->colmap = 0;
1929   if (oldmat->garray) {
1930     PetscInt len;
1931     len  = oldmat->B->n;
1932     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1933     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1934     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1935   } else a->garray = 0;
1936 
1937   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1938   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1939   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1940   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1941   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1942   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1943   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1944   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1945   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1946   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1947   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1948   *newmat = mat;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #include "petscsys.h"
1953 
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatLoad_MPIAIJ"
1956 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1957 {
1958   Mat            A;
1959   PetscScalar    *vals,*svals;
1960   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1961   MPI_Status     status;
1962   PetscErrorCode ierr;
1963   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1964   PetscInt       i,nz,j,rstart,rend,mmax;
1965   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1966   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1967   PetscInt       cend,cstart,n,*rowners;
1968   int            fd;
1969 
1970   PetscFunctionBegin;
1971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1972   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1973   if (!rank) {
1974     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1975     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1976     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1977   }
1978 
1979   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1980   M = header[1]; N = header[2];
1981   /* determine ownership of all rows */
1982   m    = M/size + ((M % size) > rank);
1983   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1984   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1985 
1986   /* First process needs enough room for process with most rows */
1987   if (!rank) {
1988     mmax       = rowners[1];
1989     for (i=2; i<size; i++) {
1990       mmax = PetscMax(mmax,rowners[i]);
1991     }
1992   } else mmax = m;
1993 
1994   rowners[0] = 0;
1995   for (i=2; i<=size; i++) {
1996     mmax       = PetscMax(mmax,rowners[i]);
1997     rowners[i] += rowners[i-1];
1998   }
1999   rstart = rowners[rank];
2000   rend   = rowners[rank+1];
2001 
2002   /* distribute row lengths to all processors */
2003   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
2004   if (!rank) {
2005     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
2006     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2007     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2008     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2009     for (j=0; j<m; j++) {
2010       procsnz[0] += ourlens[j];
2011     }
2012     for (i=1; i<size; i++) {
2013       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
2014       /* calculate the number of nonzeros on each processor */
2015       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
2016         procsnz[i] += rowlengths[j];
2017       }
2018       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2019     }
2020     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2021   } else {
2022     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2023   }
2024 
2025   if (!rank) {
2026     /* determine max buffer needed and allocate it */
2027     maxnz = 0;
2028     for (i=0; i<size; i++) {
2029       maxnz = PetscMax(maxnz,procsnz[i]);
2030     }
2031     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2032 
2033     /* read in my part of the matrix column indices  */
2034     nz   = procsnz[0];
2035     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2036     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2037 
2038     /* read in every one elses and ship off */
2039     for (i=1; i<size; i++) {
2040       nz   = procsnz[i];
2041       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2042       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2043     }
2044     ierr = PetscFree(cols);CHKERRQ(ierr);
2045   } else {
2046     /* determine buffer space needed for message */
2047     nz = 0;
2048     for (i=0; i<m; i++) {
2049       nz += ourlens[i];
2050     }
2051     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
2052 
2053     /* receive message of column indices*/
2054     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2055     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2056     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2057   }
2058 
2059   /* determine column ownership if matrix is not square */
2060   if (N != M) {
2061     n      = N/size + ((N % size) > rank);
2062     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2063     cstart = cend - n;
2064   } else {
2065     cstart = rstart;
2066     cend   = rend;
2067     n      = cend - cstart;
2068   }
2069 
2070   /* loop over local rows, determining number of off diagonal entries */
2071   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2072   jj = 0;
2073   for (i=0; i<m; i++) {
2074     for (j=0; j<ourlens[i]; j++) {
2075       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2076       jj++;
2077     }
2078   }
2079 
2080   /* create our matrix */
2081   for (i=0; i<m; i++) {
2082     ourlens[i] -= offlens[i];
2083   }
2084   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2085   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
2086   ierr = MatSetType(A,type);CHKERRQ(ierr);
2087   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2088 
2089   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2090   for (i=0; i<m; i++) {
2091     ourlens[i] += offlens[i];
2092   }
2093 
2094   if (!rank) {
2095     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2096 
2097     /* read in my part of the matrix numerical values  */
2098     nz   = procsnz[0];
2099     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2100 
2101     /* insert into matrix */
2102     jj      = rstart;
2103     smycols = mycols;
2104     svals   = vals;
2105     for (i=0; i<m; i++) {
2106       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2107       smycols += ourlens[i];
2108       svals   += ourlens[i];
2109       jj++;
2110     }
2111 
2112     /* read in other processors and ship out */
2113     for (i=1; i<size; i++) {
2114       nz   = procsnz[i];
2115       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2116       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2117     }
2118     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2119   } else {
2120     /* receive numeric values */
2121     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2122 
2123     /* receive message of values*/
2124     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2125     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2126     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2127 
2128     /* insert into matrix */
2129     jj      = rstart;
2130     smycols = mycols;
2131     svals   = vals;
2132     for (i=0; i<m; i++) {
2133       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2134       smycols += ourlens[i];
2135       svals   += ourlens[i];
2136       jj++;
2137     }
2138   }
2139   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2140   ierr = PetscFree(vals);CHKERRQ(ierr);
2141   ierr = PetscFree(mycols);CHKERRQ(ierr);
2142   ierr = PetscFree(rowners);CHKERRQ(ierr);
2143 
2144   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2145   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   *newmat = A;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 #undef __FUNCT__
2151 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2152 /*
2153     Not great since it makes two copies of the submatrix, first an SeqAIJ
2154   in local and then by concatenating the local matrices the end result.
2155   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2156 */
2157 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2158 {
2159   PetscErrorCode ierr;
2160   PetscMPIInt    rank,size;
2161   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2162   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2163   Mat            *local,M,Mreuse;
2164   PetscScalar    *vwork,*aa;
2165   MPI_Comm       comm = mat->comm;
2166   Mat_SeqAIJ     *aij;
2167 
2168 
2169   PetscFunctionBegin;
2170   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2172 
2173   if (call ==  MAT_REUSE_MATRIX) {
2174     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2175     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2176     local = &Mreuse;
2177     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2178   } else {
2179     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2180     Mreuse = *local;
2181     ierr   = PetscFree(local);CHKERRQ(ierr);
2182   }
2183 
2184   /*
2185       m - number of local rows
2186       n - number of columns (same on all processors)
2187       rstart - first row in new global matrix generated
2188   */
2189   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2190   if (call == MAT_INITIAL_MATRIX) {
2191     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2192     ii  = aij->i;
2193     jj  = aij->j;
2194 
2195     /*
2196         Determine the number of non-zeros in the diagonal and off-diagonal
2197         portions of the matrix in order to do correct preallocation
2198     */
2199 
2200     /* first get start and end of "diagonal" columns */
2201     if (csize == PETSC_DECIDE) {
2202       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2203       if (mglobal == n) { /* square matrix */
2204 	nlocal = m;
2205       } else {
2206         nlocal = n/size + ((n % size) > rank);
2207       }
2208     } else {
2209       nlocal = csize;
2210     }
2211     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2212     rstart = rend - nlocal;
2213     if (rank == size - 1 && rend != n) {
2214       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2215     }
2216 
2217     /* next, compute all the lengths */
2218     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2219     olens = dlens + m;
2220     for (i=0; i<m; i++) {
2221       jend = ii[i+1] - ii[i];
2222       olen = 0;
2223       dlen = 0;
2224       for (j=0; j<jend; j++) {
2225         if (*jj < rstart || *jj >= rend) olen++;
2226         else dlen++;
2227         jj++;
2228       }
2229       olens[i] = olen;
2230       dlens[i] = dlen;
2231     }
2232     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2233     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2234     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2235     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2236     ierr = PetscFree(dlens);CHKERRQ(ierr);
2237   } else {
2238     PetscInt ml,nl;
2239 
2240     M = *newmat;
2241     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2242     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2243     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2244     /*
2245          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2246        rather than the slower MatSetValues().
2247     */
2248     M->was_assembled = PETSC_TRUE;
2249     M->assembled     = PETSC_FALSE;
2250   }
2251   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2252   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2253   ii  = aij->i;
2254   jj  = aij->j;
2255   aa  = aij->a;
2256   for (i=0; i<m; i++) {
2257     row   = rstart + i;
2258     nz    = ii[i+1] - ii[i];
2259     cwork = jj;     jj += nz;
2260     vwork = aa;     aa += nz;
2261     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2262   }
2263 
2264   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   *newmat = M;
2267 
2268   /* save submatrix used in processor for next request */
2269   if (call ==  MAT_INITIAL_MATRIX) {
2270     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2271     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2272   }
2273 
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 EXTERN_C_BEGIN
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2280 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2281 {
2282   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2283   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2284   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2285   const PetscInt *JJ;
2286   PetscScalar    *values;
2287   PetscErrorCode ierr;
2288 
2289   PetscFunctionBegin;
2290   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2291   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2292   o_nnz = d_nnz + m;
2293 
2294   for (i=0; i<m; i++) {
2295     nnz     = I[i+1]- I[i];
2296     JJ      = J + I[i];
2297     nnz_max = PetscMax(nnz_max,nnz);
2298     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2299     for (j=0; j<nnz; j++) {
2300       if (*JJ >= cstart) break;
2301       JJ++;
2302     }
2303     d = 0;
2304     for (; j<nnz; j++) {
2305       if (*JJ++ >= cend) break;
2306       d++;
2307     }
2308     d_nnz[i] = d;
2309     o_nnz[i] = nnz - d;
2310   }
2311   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2312   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2313 
2314   if (v) values = (PetscScalar*)v;
2315   else {
2316     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2317     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2318   }
2319 
2320   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2321   for (i=0; i<m; i++) {
2322     ii   = i + rstart;
2323     nnz  = I[i+1]- I[i];
2324     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2325   }
2326   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2328   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2329 
2330   if (!v) {
2331     ierr = PetscFree(values);CHKERRQ(ierr);
2332   }
2333   PetscFunctionReturn(0);
2334 }
2335 EXTERN_C_END
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2339 /*@
2340    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2341    (the default parallel PETSc format).
2342 
2343    Collective on MPI_Comm
2344 
2345    Input Parameters:
2346 +  B - the matrix
2347 .  i - the indices into j for the start of each local row (starts with zero)
2348 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2349 -  v - optional values in the matrix
2350 
2351    Level: developer
2352 
2353 .keywords: matrix, aij, compressed row, sparse, parallel
2354 
2355 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2356 @*/
2357 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2358 {
2359   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2360 
2361   PetscFunctionBegin;
2362   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2363   if (f) {
2364     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2371 /*@C
2372    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2373    (the default parallel PETSc format).  For good matrix assembly performance
2374    the user should preallocate the matrix storage by setting the parameters
2375    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2376    performance can be increased by more than a factor of 50.
2377 
2378    Collective on MPI_Comm
2379 
2380    Input Parameters:
2381 +  A - the matrix
2382 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2383            (same value is used for all local rows)
2384 .  d_nnz - array containing the number of nonzeros in the various rows of the
2385            DIAGONAL portion of the local submatrix (possibly different for each row)
2386            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2387            The size of this array is equal to the number of local rows, i.e 'm'.
2388            You must leave room for the diagonal entry even if it is zero.
2389 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2390            submatrix (same value is used for all local rows).
2391 -  o_nnz - array containing the number of nonzeros in the various rows of the
2392            OFF-DIAGONAL portion of the local submatrix (possibly different for
2393            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2394            structure. The size of this array is equal to the number
2395            of local rows, i.e 'm'.
2396 
2397    If the *_nnz parameter is given then the *_nz parameter is ignored
2398 
2399    The AIJ format (also called the Yale sparse matrix format or
2400    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2401    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2402 
2403    The parallel matrix is partitioned such that the first m0 rows belong to
2404    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2405    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2406 
2407    The DIAGONAL portion of the local submatrix of a processor can be defined
2408    as the submatrix which is obtained by extraction the part corresponding
2409    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2410    first row that belongs to the processor, and r2 is the last row belonging
2411    to the this processor. This is a square mxm matrix. The remaining portion
2412    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2413 
2414    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2415 
2416    Example usage:
2417 
2418    Consider the following 8x8 matrix with 34 non-zero values, that is
2419    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2420    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2421    as follows:
2422 
2423 .vb
2424             1  2  0  |  0  3  0  |  0  4
2425     Proc0   0  5  6  |  7  0  0  |  8  0
2426             9  0 10  | 11  0  0  | 12  0
2427     -------------------------------------
2428            13  0 14  | 15 16 17  |  0  0
2429     Proc1   0 18  0  | 19 20 21  |  0  0
2430             0  0  0  | 22 23  0  | 24  0
2431     -------------------------------------
2432     Proc2  25 26 27  |  0  0 28  | 29  0
2433            30  0  0  | 31 32 33  |  0 34
2434 .ve
2435 
2436    This can be represented as a collection of submatrices as:
2437 
2438 .vb
2439       A B C
2440       D E F
2441       G H I
2442 .ve
2443 
2444    Where the submatrices A,B,C are owned by proc0, D,E,F are
2445    owned by proc1, G,H,I are owned by proc2.
2446 
2447    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2448    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2449    The 'M','N' parameters are 8,8, and have the same values on all procs.
2450 
2451    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2452    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2453    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2454    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2455    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2456    matrix, ans [DF] as another SeqAIJ matrix.
2457 
2458    When d_nz, o_nz parameters are specified, d_nz storage elements are
2459    allocated for every row of the local diagonal submatrix, and o_nz
2460    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2461    One way to choose d_nz and o_nz is to use the max nonzerors per local
2462    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2463    In this case, the values of d_nz,o_nz are:
2464 .vb
2465      proc0 : dnz = 2, o_nz = 2
2466      proc1 : dnz = 3, o_nz = 2
2467      proc2 : dnz = 1, o_nz = 4
2468 .ve
2469    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2470    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2471    for proc3. i.e we are using 12+15+10=37 storage locations to store
2472    34 values.
2473 
2474    When d_nnz, o_nnz parameters are specified, the storage is specified
2475    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2476    In the above case the values for d_nnz,o_nnz are:
2477 .vb
2478      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2479      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2480      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2481 .ve
2482    Here the space allocated is sum of all the above values i.e 34, and
2483    hence pre-allocation is perfect.
2484 
2485    Level: intermediate
2486 
2487 .keywords: matrix, aij, compressed row, sparse, parallel
2488 
2489 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2490           MPIAIJ
2491 @*/
2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2493 {
2494   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2498   if (f) {
2499     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatCreateMPIAIJ"
2506 /*@C
2507    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2508    (the default parallel PETSc format).  For good matrix assembly performance
2509    the user should preallocate the matrix storage by setting the parameters
2510    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2511    performance can be increased by more than a factor of 50.
2512 
2513    Collective on MPI_Comm
2514 
2515    Input Parameters:
2516 +  comm - MPI communicator
2517 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2518            This value should be the same as the local size used in creating the
2519            y vector for the matrix-vector product y = Ax.
2520 .  n - This value should be the same as the local size used in creating the
2521        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2522        calculated if N is given) For square matrices n is almost always m.
2523 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2524 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2525 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2526            (same value is used for all local rows)
2527 .  d_nnz - array containing the number of nonzeros in the various rows of the
2528            DIAGONAL portion of the local submatrix (possibly different for each row)
2529            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2530            The size of this array is equal to the number of local rows, i.e 'm'.
2531            You must leave room for the diagonal entry even if it is zero.
2532 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2533            submatrix (same value is used for all local rows).
2534 -  o_nnz - array containing the number of nonzeros in the various rows of the
2535            OFF-DIAGONAL portion of the local submatrix (possibly different for
2536            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2537            structure. The size of this array is equal to the number
2538            of local rows, i.e 'm'.
2539 
2540    Output Parameter:
2541 .  A - the matrix
2542 
2543    Notes:
2544    If the *_nnz parameter is given then the *_nz parameter is ignored
2545 
2546    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2547    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2548    storage requirements for this matrix.
2549 
2550    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2551    processor than it must be used on all processors that share the object for
2552    that argument.
2553 
2554    The user MUST specify either the local or global matrix dimensions
2555    (possibly both).
2556 
2557    The parallel matrix is partitioned such that the first m0 rows belong to
2558    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2559    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2560 
2561    The DIAGONAL portion of the local submatrix of a processor can be defined
2562    as the submatrix which is obtained by extraction the part corresponding
2563    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2564    first row that belongs to the processor, and r2 is the last row belonging
2565    to the this processor. This is a square mxm matrix. The remaining portion
2566    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2567 
2568    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2569 
2570    When calling this routine with a single process communicator, a matrix of
2571    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2572    type of communicator, use the construction mechanism:
2573      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2574 
2575    By default, this format uses inodes (identical nodes) when possible.
2576    We search for consecutive rows with the same nonzero structure, thereby
2577    reusing matrix information to achieve increased efficiency.
2578 
2579    Options Database Keys:
2580 +  -mat_no_inode  - Do not use inodes
2581 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2582 -  -mat_aij_oneindex - Internally use indexing starting at 1
2583         rather than 0.  Note that when calling MatSetValues(),
2584         the user still MUST index entries starting at 0!
2585 
2586 
2587    Example usage:
2588 
2589    Consider the following 8x8 matrix with 34 non-zero values, that is
2590    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2591    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2592    as follows:
2593 
2594 .vb
2595             1  2  0  |  0  3  0  |  0  4
2596     Proc0   0  5  6  |  7  0  0  |  8  0
2597             9  0 10  | 11  0  0  | 12  0
2598     -------------------------------------
2599            13  0 14  | 15 16 17  |  0  0
2600     Proc1   0 18  0  | 19 20 21  |  0  0
2601             0  0  0  | 22 23  0  | 24  0
2602     -------------------------------------
2603     Proc2  25 26 27  |  0  0 28  | 29  0
2604            30  0  0  | 31 32 33  |  0 34
2605 .ve
2606 
2607    This can be represented as a collection of submatrices as:
2608 
2609 .vb
2610       A B C
2611       D E F
2612       G H I
2613 .ve
2614 
2615    Where the submatrices A,B,C are owned by proc0, D,E,F are
2616    owned by proc1, G,H,I are owned by proc2.
2617 
2618    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2619    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2620    The 'M','N' parameters are 8,8, and have the same values on all procs.
2621 
2622    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2623    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2624    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2625    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2626    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2627    matrix, ans [DF] as another SeqAIJ matrix.
2628 
2629    When d_nz, o_nz parameters are specified, d_nz storage elements are
2630    allocated for every row of the local diagonal submatrix, and o_nz
2631    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2632    One way to choose d_nz and o_nz is to use the max nonzerors per local
2633    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2634    In this case, the values of d_nz,o_nz are:
2635 .vb
2636      proc0 : dnz = 2, o_nz = 2
2637      proc1 : dnz = 3, o_nz = 2
2638      proc2 : dnz = 1, o_nz = 4
2639 .ve
2640    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2641    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2642    for proc3. i.e we are using 12+15+10=37 storage locations to store
2643    34 values.
2644 
2645    When d_nnz, o_nnz parameters are specified, the storage is specified
2646    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2647    In the above case the values for d_nnz,o_nnz are:
2648 .vb
2649      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2650      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2651      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2652 .ve
2653    Here the space allocated is sum of all the above values i.e 34, and
2654    hence pre-allocation is perfect.
2655 
2656    Level: intermediate
2657 
2658 .keywords: matrix, aij, compressed row, sparse, parallel
2659 
2660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2661           MPIAIJ
2662 @*/
2663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2664 {
2665   PetscErrorCode ierr;
2666   PetscMPIInt    size;
2667 
2668   PetscFunctionBegin;
2669   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2670   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2671   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2672   if (size > 1) {
2673     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2674     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2675   } else {
2676     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2677     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2684 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2685 {
2686   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2687 
2688   PetscFunctionBegin;
2689   *Ad     = a->A;
2690   *Ao     = a->B;
2691   *colmap = a->garray;
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2697 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2698 {
2699   PetscErrorCode ierr;
2700   PetscInt       i;
2701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2702 
2703   PetscFunctionBegin;
2704   if (coloring->ctype == IS_COLORING_LOCAL) {
2705     ISColoringValue *allcolors,*colors;
2706     ISColoring      ocoloring;
2707 
2708     /* set coloring for diagonal portion */
2709     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2710 
2711     /* set coloring for off-diagonal portion */
2712     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2713     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2714     for (i=0; i<a->B->n; i++) {
2715       colors[i] = allcolors[a->garray[i]];
2716     }
2717     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2718     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2719     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2720     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2721   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2722     ISColoringValue *colors;
2723     PetscInt             *larray;
2724     ISColoring      ocoloring;
2725 
2726     /* set coloring for diagonal portion */
2727     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2728     for (i=0; i<a->A->n; i++) {
2729       larray[i] = i + a->cstart;
2730     }
2731     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2732     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2733     for (i=0; i<a->A->n; i++) {
2734       colors[i] = coloring->colors[larray[i]];
2735     }
2736     ierr = PetscFree(larray);CHKERRQ(ierr);
2737     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2738     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2739     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2740 
2741     /* set coloring for off-diagonal portion */
2742     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2743     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2744     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2745     for (i=0; i<a->B->n; i++) {
2746       colors[i] = coloring->colors[larray[i]];
2747     }
2748     ierr = PetscFree(larray);CHKERRQ(ierr);
2749     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,coloring->n,colors,&ocoloring);CHKERRQ(ierr);
2750     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2751     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2752   } else {
2753     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2754   }
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 #if defined(PETSC_HAVE_ADIC)
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2762 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2763 {
2764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2765   PetscErrorCode ierr;
2766 
2767   PetscFunctionBegin;
2768   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2769   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 #endif
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2776 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2777 {
2778   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2783   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatMerge"
2789 /*@C
2790       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2791                  matrices from each processor
2792 
2793     Collective on MPI_Comm
2794 
2795    Input Parameters:
2796 +    comm - the communicators the parallel matrix will live on
2797 .    inmat - the input sequential matrices
2798 .    n - number of local columns (or PETSC_DECIDE)
2799 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2800 
2801    Output Parameter:
2802 .    outmat - the parallel matrix generated
2803 
2804     Level: advanced
2805 
2806    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2807 
2808 @*/
2809 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2810 {
2811   PetscErrorCode ierr;
2812   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2813   PetscInt       *indx;
2814   PetscScalar    *values;
2815 
2816   PetscFunctionBegin;
2817   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2818   if (scall == MAT_INITIAL_MATRIX){
2819     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2820     if (n == PETSC_DECIDE){
2821       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2822     }
2823     ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2824     rstart -= m;
2825 
2826     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2827     for (i=0;i<m;i++) {
2828       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2829       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2830       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2831     }
2832     /* This routine will ONLY return MPIAIJ type matrix */
2833     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2834     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2835     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2836     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2837     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2838 
2839   } else if (scall == MAT_REUSE_MATRIX){
2840     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2841   } else {
2842     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2843   }
2844 
2845   for (i=0;i<m;i++) {
2846     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2847     I    = i + rstart;
2848     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2849     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2850   }
2851   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2852   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2853   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2854 
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatFileSplit"
2860 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2861 {
2862   PetscErrorCode    ierr;
2863   PetscMPIInt       rank;
2864   PetscInt          m,N,i,rstart,nnz;
2865   size_t            len;
2866   const PetscInt    *indx;
2867   PetscViewer       out;
2868   char              *name;
2869   Mat               B;
2870   const PetscScalar *values;
2871 
2872   PetscFunctionBegin;
2873   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2874   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2875   /* Should this be the type of the diagonal block of A? */
2876   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2877   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2878   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2879   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2880   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2881   for (i=0;i<m;i++) {
2882     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2883     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2884     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2885   }
2886   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2888 
2889   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2890   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2891   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2892   sprintf(name,"%s.%d",outfile,rank);
2893   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
2894   ierr = PetscFree(name);
2895   ierr = MatView(B,out);CHKERRQ(ierr);
2896   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2897   ierr = MatDestroy(B);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2904 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2905 {
2906   PetscErrorCode       ierr;
2907   Mat_Merge_SeqsToMPI  *merge;
2908   PetscObjectContainer container;
2909 
2910   PetscFunctionBegin;
2911   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2912   if (container) {
2913     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2914     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2915     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2916     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2917     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2918     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2919     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2920     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2921     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2922     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2923     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2924 
2925     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2926     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2927   }
2928   ierr = PetscFree(merge);CHKERRQ(ierr);
2929 
2930   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #include "src/mat/utils/freespace.h"
2935 #include "petscbt.h"
2936 static PetscEvent logkey_seqstompinum = 0;
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2939 /*@C
2940       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2941                  matrices from each processor
2942 
2943     Collective on MPI_Comm
2944 
2945    Input Parameters:
2946 +    comm - the communicators the parallel matrix will live on
2947 .    seqmat - the input sequential matrices
2948 .    m - number of local rows (or PETSC_DECIDE)
2949 .    n - number of local columns (or PETSC_DECIDE)
2950 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2951 
2952    Output Parameter:
2953 .    mpimat - the parallel matrix generated
2954 
2955     Level: advanced
2956 
2957    Notes:
2958      The dimensions of the sequential matrix in each processor MUST be the same.
2959      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2960      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2961 @*/
2962 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2963 {
2964   PetscErrorCode       ierr;
2965   MPI_Comm             comm=mpimat->comm;
2966   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2967   PetscMPIInt          size,rank,taga,*len_s;
2968   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2969   PetscInt             proc,m;
2970   PetscInt             **buf_ri,**buf_rj;
2971   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2972   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2973   MPI_Request          *s_waits,*r_waits;
2974   MPI_Status           *status;
2975   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2976   Mat_Merge_SeqsToMPI  *merge;
2977   PetscObjectContainer container;
2978 
2979   PetscFunctionBegin;
2980   if (!logkey_seqstompinum) {
2981     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2982   }
2983   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2984 
2985   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2986   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2987 
2988   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2989   if (container) {
2990     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2991   }
2992   bi     = merge->bi;
2993   bj     = merge->bj;
2994   buf_ri = merge->buf_ri;
2995   buf_rj = merge->buf_rj;
2996 
2997   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2998   owners = merge->rowmap.range;
2999   len_s  = merge->len_s;
3000 
3001   /* send and recv matrix values */
3002   /*-----------------------------*/
3003   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
3004   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
3005 
3006   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
3007   for (proc=0,k=0; proc<size; proc++){
3008     if (!len_s[proc]) continue;
3009     i = owners[proc];
3010     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
3011     k++;
3012   }
3013 
3014   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
3015   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
3016   ierr = PetscFree(status);CHKERRQ(ierr);
3017 
3018   ierr = PetscFree(s_waits);CHKERRQ(ierr);
3019   ierr = PetscFree(r_waits);CHKERRQ(ierr);
3020 
3021   /* insert mat values of mpimat */
3022   /*----------------------------*/
3023   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
3024   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3025   nextrow = buf_ri_k + merge->nrecv;
3026   nextai  = nextrow + merge->nrecv;
3027 
3028   for (k=0; k<merge->nrecv; k++){
3029     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3030     nrows = *(buf_ri_k[k]);
3031     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
3032     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3033   }
3034 
3035   /* set values of ba */
3036   m = merge->rowmap.n;
3037   for (i=0; i<m; i++) {
3038     arow = owners[rank] + i;
3039     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
3040     bnzi = bi[i+1] - bi[i];
3041     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
3042 
3043     /* add local non-zero vals of this proc's seqmat into ba */
3044     anzi = ai[arow+1] - ai[arow];
3045     aj   = a->j + ai[arow];
3046     aa   = a->a + ai[arow];
3047     nextaj = 0;
3048     for (j=0; nextaj<anzi; j++){
3049       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3050         ba_i[j] += aa[nextaj++];
3051       }
3052     }
3053 
3054     /* add received vals into ba */
3055     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3056       /* i-th row */
3057       if (i == *nextrow[k]) {
3058         anzi = *(nextai[k]+1) - *nextai[k];
3059         aj   = buf_rj[k] + *(nextai[k]);
3060         aa   = abuf_r[k] + *(nextai[k]);
3061         nextaj = 0;
3062         for (j=0; nextaj<anzi; j++){
3063           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3064             ba_i[j] += aa[nextaj++];
3065           }
3066         }
3067         nextrow[k]++; nextai[k]++;
3068       }
3069     }
3070     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3071   }
3072   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074 
3075   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3076   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3077   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3078   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3079   PetscFunctionReturn(0);
3080 }
3081 
3082 static PetscEvent logkey_seqstompisym = 0;
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3085 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3086 {
3087   PetscErrorCode       ierr;
3088   Mat                  B_mpi;
3089   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3090   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3091   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3092   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3093   PetscInt             len,proc,*dnz,*onz;
3094   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3095   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3096   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3097   MPI_Status           *status;
3098   PetscFreeSpaceList   free_space=PETSC_NULL,current_space=PETSC_NULL;
3099   PetscBT              lnkbt;
3100   Mat_Merge_SeqsToMPI  *merge;
3101   PetscObjectContainer container;
3102 
3103   PetscFunctionBegin;
3104   if (!logkey_seqstompisym) {
3105     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3106   }
3107   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3108 
3109   /* make sure it is a PETSc comm */
3110   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3112   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3113 
3114   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3115   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3116 
3117   /* determine row ownership */
3118   /*---------------------------------------------------------*/
3119   ierr = PetscMapInitialize(comm,m,M,&merge->rowmap);CHKERRQ(ierr);
3120   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3121   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3122 
3123   m      = merge->rowmap.n;
3124   M      = merge->rowmap.N;
3125   owners = merge->rowmap.range;
3126 
3127   /* determine the number of messages to send, their lengths */
3128   /*---------------------------------------------------------*/
3129   len_s  = merge->len_s;
3130 
3131   len = 0;  /* length of buf_si[] */
3132   merge->nsend = 0;
3133   for (proc=0; proc<size; proc++){
3134     len_si[proc] = 0;
3135     if (proc == rank){
3136       len_s[proc] = 0;
3137     } else {
3138       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3139       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3140     }
3141     if (len_s[proc]) {
3142       merge->nsend++;
3143       nrows = 0;
3144       for (i=owners[proc]; i<owners[proc+1]; i++){
3145         if (ai[i+1] > ai[i]) nrows++;
3146       }
3147       len_si[proc] = 2*(nrows+1);
3148       len += len_si[proc];
3149     }
3150   }
3151 
3152   /* determine the number and length of messages to receive for ij-structure */
3153   /*-------------------------------------------------------------------------*/
3154   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3155   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3156 
3157   /* post the Irecv of j-structure */
3158   /*-------------------------------*/
3159   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&tagj);CHKERRQ(ierr);
3160   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3161 
3162   /* post the Isend of j-structure */
3163   /*--------------------------------*/
3164   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3165   sj_waits = si_waits + merge->nsend;
3166 
3167   for (proc=0, k=0; proc<size; proc++){
3168     if (!len_s[proc]) continue;
3169     i = owners[proc];
3170     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3171     k++;
3172   }
3173 
3174   /* receives and sends of j-structure are complete */
3175   /*------------------------------------------------*/
3176   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3177   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3178 
3179   /* send and recv i-structure */
3180   /*---------------------------*/
3181   ierr = PetscObjectGetNewTag((PetscObject)merge,&tagi);CHKERRQ(ierr);
3182   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3183 
3184   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3185   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3186   for (proc=0,k=0; proc<size; proc++){
3187     if (!len_s[proc]) continue;
3188     /* form outgoing message for i-structure:
3189          buf_si[0]:                 nrows to be sent
3190                [1:nrows]:           row index (global)
3191                [nrows+1:2*nrows+1]: i-structure index
3192     */
3193     /*-------------------------------------------*/
3194     nrows = len_si[proc]/2 - 1;
3195     buf_si_i    = buf_si + nrows+1;
3196     buf_si[0]   = nrows;
3197     buf_si_i[0] = 0;
3198     nrows = 0;
3199     for (i=owners[proc]; i<owners[proc+1]; i++){
3200       anzi = ai[i+1] - ai[i];
3201       if (anzi) {
3202         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3203         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3204         nrows++;
3205       }
3206     }
3207     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3208     k++;
3209     buf_si += len_si[proc];
3210   }
3211 
3212   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3213   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3214 
3215   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
3216   for (i=0; i<merge->nrecv; i++){
3217     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
3218   }
3219 
3220   ierr = PetscFree(len_si);CHKERRQ(ierr);
3221   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3222   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3223   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3224   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3225   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3226   ierr = PetscFree(status);CHKERRQ(ierr);
3227 
3228   /* compute a local seq matrix in each processor */
3229   /*----------------------------------------------*/
3230   /* allocate bi array and free space for accumulating nonzero column info */
3231   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3232   bi[0] = 0;
3233 
3234   /* create and initialize a linked list */
3235   nlnk = N+1;
3236   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3237 
3238   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3239   len = 0;
3240   len  = ai[owners[rank+1]] - ai[owners[rank]];
3241   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3242   current_space = free_space;
3243 
3244   /* determine symbolic info for each local row */
3245   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3246   nextrow = buf_ri_k + merge->nrecv;
3247   nextai  = nextrow + merge->nrecv;
3248   for (k=0; k<merge->nrecv; k++){
3249     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3250     nrows = *buf_ri_k[k];
3251     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3252     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3253   }
3254 
3255   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3256   len = 0;
3257   for (i=0;i<m;i++) {
3258     bnzi   = 0;
3259     /* add local non-zero cols of this proc's seqmat into lnk */
3260     arow   = owners[rank] + i;
3261     anzi   = ai[arow+1] - ai[arow];
3262     aj     = a->j + ai[arow];
3263     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3264     bnzi += nlnk;
3265     /* add received col data into lnk */
3266     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3267       if (i == *nextrow[k]) { /* i-th row */
3268         anzi = *(nextai[k]+1) - *nextai[k];
3269         aj   = buf_rj[k] + *nextai[k];
3270         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3271         bnzi += nlnk;
3272         nextrow[k]++; nextai[k]++;
3273       }
3274     }
3275     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3276 
3277     /* if free space is not available, make more free space */
3278     if (current_space->local_remaining<bnzi) {
3279       ierr = PetscFreeSpaceGet(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3280       nspacedouble++;
3281     }
3282     /* copy data into free space, then initialize lnk */
3283     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3284     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3285 
3286     current_space->array           += bnzi;
3287     current_space->local_used      += bnzi;
3288     current_space->local_remaining -= bnzi;
3289 
3290     bi[i+1] = bi[i] + bnzi;
3291   }
3292 
3293   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3294 
3295   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3296   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3297   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3298 
3299   /* create symbolic parallel matrix B_mpi */
3300   /*---------------------------------------*/
3301   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3302   if (n==PETSC_DECIDE) {
3303     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3304   } else {
3305     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3306   }
3307   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3308   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3309   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3310 
3311   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3312   B_mpi->assembled     = PETSC_FALSE;
3313   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3314   merge->bi            = bi;
3315   merge->bj            = bj;
3316   merge->buf_ri        = buf_ri;
3317   merge->buf_rj        = buf_rj;
3318   merge->coi           = PETSC_NULL;
3319   merge->coj           = PETSC_NULL;
3320   merge->owners_co     = PETSC_NULL;
3321 
3322   /* attach the supporting struct to B_mpi for reuse */
3323   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3324   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3325   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3326   *mpimat = B_mpi;
3327 
3328   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3329   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 static PetscEvent logkey_seqstompi = 0;
3334 #undef __FUNCT__
3335 #define __FUNCT__ "MatMerge_SeqsToMPI"
3336 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3337 {
3338   PetscErrorCode   ierr;
3339 
3340   PetscFunctionBegin;
3341   if (!logkey_seqstompi) {
3342     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3343   }
3344   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3345   if (scall == MAT_INITIAL_MATRIX){
3346     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3347   }
3348   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3349   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3350   PetscFunctionReturn(0);
3351 }
3352 static PetscEvent logkey_getlocalmat = 0;
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatGetLocalMat"
3355 /*@C
3356      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3357 
3358     Not Collective
3359 
3360    Input Parameters:
3361 +    A - the matrix
3362 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3363 
3364    Output Parameter:
3365 .    A_loc - the local sequential matrix generated
3366 
3367     Level: developer
3368 
3369 @*/
3370 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3371 {
3372   PetscErrorCode  ierr;
3373   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3374   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3375   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3376   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3377   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3378   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3379 
3380   PetscFunctionBegin;
3381   if (!logkey_getlocalmat) {
3382     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3383   }
3384   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3385   if (scall == MAT_INITIAL_MATRIX){
3386     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3387     ci[0] = 0;
3388     for (i=0; i<am; i++){
3389       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3390     }
3391     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3392     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3393     k = 0;
3394     for (i=0; i<am; i++) {
3395       ncols_o = bi[i+1] - bi[i];
3396       ncols_d = ai[i+1] - ai[i];
3397       /* off-diagonal portion of A */
3398       for (jo=0; jo<ncols_o; jo++) {
3399         col = cmap[*bj];
3400         if (col >= cstart) break;
3401         cj[k]   = col; bj++;
3402         ca[k++] = *ba++;
3403       }
3404       /* diagonal portion of A */
3405       for (j=0; j<ncols_d; j++) {
3406         cj[k]   = cstart + *aj++;
3407         ca[k++] = *aa++;
3408       }
3409       /* off-diagonal portion of A */
3410       for (j=jo; j<ncols_o; j++) {
3411         cj[k]   = cmap[*bj++];
3412         ca[k++] = *ba++;
3413       }
3414     }
3415     /* put together the new matrix */
3416     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3417     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3418     /* Since these are PETSc arrays, change flags to free them as necessary. */
3419     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3420     mat->freedata = PETSC_TRUE;
3421     mat->nonew    = 0;
3422   } else if (scall == MAT_REUSE_MATRIX){
3423     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3424     ci = mat->i; cj = mat->j; ca = mat->a;
3425     for (i=0; i<am; i++) {
3426       /* off-diagonal portion of A */
3427       ncols_o = bi[i+1] - bi[i];
3428       for (jo=0; jo<ncols_o; jo++) {
3429         col = cmap[*bj];
3430         if (col >= cstart) break;
3431         *ca++ = *ba++; bj++;
3432       }
3433       /* diagonal portion of A */
3434       ncols_d = ai[i+1] - ai[i];
3435       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3436       /* off-diagonal portion of A */
3437       for (j=jo; j<ncols_o; j++) {
3438         *ca++ = *ba++; bj++;
3439       }
3440     }
3441   } else {
3442     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3443   }
3444 
3445   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 static PetscEvent logkey_getlocalmatcondensed = 0;
3450 #undef __FUNCT__
3451 #define __FUNCT__ "MatGetLocalMatCondensed"
3452 /*@C
3453      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3454 
3455     Not Collective
3456 
3457    Input Parameters:
3458 +    A - the matrix
3459 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3460 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3461 
3462    Output Parameter:
3463 .    A_loc - the local sequential matrix generated
3464 
3465     Level: developer
3466 
3467 @*/
3468 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3469 {
3470   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3471   PetscErrorCode    ierr;
3472   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3473   IS                isrowa,iscola;
3474   Mat               *aloc;
3475 
3476   PetscFunctionBegin;
3477   if (!logkey_getlocalmatcondensed) {
3478     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3479   }
3480   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3481   if (!row){
3482     start = a->rstart; end = a->rend;
3483     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3484   } else {
3485     isrowa = *row;
3486   }
3487   if (!col){
3488     start = a->cstart;
3489     cmap  = a->garray;
3490     nzA   = a->A->n;
3491     nzB   = a->B->n;
3492     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3493     ncols = 0;
3494     for (i=0; i<nzB; i++) {
3495       if (cmap[i] < start) idx[ncols++] = cmap[i];
3496       else break;
3497     }
3498     imark = i;
3499     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3500     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3501     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3502     ierr = PetscFree(idx);CHKERRQ(ierr);
3503   } else {
3504     iscola = *col;
3505   }
3506   if (scall != MAT_INITIAL_MATRIX){
3507     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3508     aloc[0] = *A_loc;
3509   }
3510   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3511   *A_loc = aloc[0];
3512   ierr = PetscFree(aloc);CHKERRQ(ierr);
3513   if (!row){
3514     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3515   }
3516   if (!col){
3517     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3518   }
3519   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3520   PetscFunctionReturn(0);
3521 }
3522 
3523 static PetscEvent logkey_GetBrowsOfAcols = 0;
3524 #undef __FUNCT__
3525 #define __FUNCT__ "MatGetBrowsOfAcols"
3526 /*@C
3527     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3528 
3529     Collective on Mat
3530 
3531    Input Parameters:
3532 +    A,B - the matrices in mpiaij format
3533 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3534 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3535 
3536    Output Parameter:
3537 +    rowb, colb - index sets of rows and columns of B to extract
3538 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3539 -    B_seq - the sequential matrix generated
3540 
3541     Level: developer
3542 
3543 @*/
3544 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3545 {
3546   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3547   PetscErrorCode    ierr;
3548   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3549   IS                isrowb,iscolb;
3550   Mat               *bseq;
3551 
3552   PetscFunctionBegin;
3553   if (a->cstart != b->rstart || a->cend != b->rend){
3554     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3555   }
3556   if (!logkey_GetBrowsOfAcols) {
3557     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3558   }
3559   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3560 
3561   if (scall == MAT_INITIAL_MATRIX){
3562     start = a->cstart;
3563     cmap  = a->garray;
3564     nzA   = a->A->n;
3565     nzB   = a->B->n;
3566     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3567     ncols = 0;
3568     for (i=0; i<nzB; i++) {  /* row < local row index */
3569       if (cmap[i] < start) idx[ncols++] = cmap[i];
3570       else break;
3571     }
3572     imark = i;
3573     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3574     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3575     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3576     ierr = PetscFree(idx);CHKERRQ(ierr);
3577     *brstart = imark;
3578     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3579   } else {
3580     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3581     isrowb = *rowb; iscolb = *colb;
3582     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3583     bseq[0] = *B_seq;
3584   }
3585   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3586   *B_seq = bseq[0];
3587   ierr = PetscFree(bseq);CHKERRQ(ierr);
3588   if (!rowb){
3589     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3590   } else {
3591     *rowb = isrowb;
3592   }
3593   if (!colb){
3594     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3595   } else {
3596     *colb = iscolb;
3597   }
3598   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3599   PetscFunctionReturn(0);
3600 }
3601 
3602 static PetscEvent logkey_GetBrowsOfAocols = 0;
3603 #undef __FUNCT__
3604 #define __FUNCT__ "MatGetBrowsOfAoCols"
3605 /*@C
3606     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3607     of the OFF-DIAGONAL portion of local A
3608 
3609     Collective on Mat
3610 
3611    Input Parameters:
3612 +    A,B - the matrices in mpiaij format
3613 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3614 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3615 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3616 
3617    Output Parameter:
3618 +    B_oth - the sequential matrix generated
3619 
3620     Level: developer
3621 
3622 @*/
3623 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3624 {
3625   VecScatter_MPI_General *gen_to,*gen_from;
3626   PetscErrorCode         ierr;
3627   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3628   Mat_SeqAIJ             *b_oth;
3629   VecScatter             ctx=a->Mvctx;
3630   MPI_Comm               comm=ctx->comm;
3631   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3632   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3633   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3634   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3635   MPI_Request            *rwaits,*swaits;
3636   MPI_Status             *sstatus,rstatus;
3637   PetscInt               *cols;
3638   PetscScalar            *vals;
3639   PetscMPIInt            j;
3640 
3641   PetscFunctionBegin;
3642   if (a->cstart != b->rstart || a->cend != b->rend){
3643     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3644   }
3645   if (!logkey_GetBrowsOfAocols) {
3646     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3647   }
3648   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3649   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3650 
3651   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3652   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3653   rvalues  = gen_from->values; /* holds the length of sending row */
3654   svalues  = gen_to->values;   /* holds the length of receiving row */
3655   nrecvs   = gen_from->n;
3656   nsends   = gen_to->n;
3657   rwaits   = gen_from->requests;
3658   swaits   = gen_to->requests;
3659   srow     = gen_to->indices;   /* local row index to be sent */
3660   rstarts  = gen_from->starts;
3661   sstarts  = gen_to->starts;
3662   rprocs   = gen_from->procs;
3663   sprocs   = gen_to->procs;
3664   sstatus  = gen_to->sstatus;
3665 
3666   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3667   if (scall == MAT_INITIAL_MATRIX){
3668     /* i-array */
3669     /*---------*/
3670     /*  post receives */
3671     for (i=0; i<nrecvs; i++){
3672       rowlen = (PetscInt*)rvalues + rstarts[i];
3673       nrows = rstarts[i+1]-rstarts[i];
3674       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3675     }
3676 
3677     /* pack the outgoing message */
3678     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3679     rstartsj = sstartsj + nsends +1;
3680     sstartsj[0] = 0;  rstartsj[0] = 0;
3681     len = 0; /* total length of j or a array to be sent */
3682     k = 0;
3683     for (i=0; i<nsends; i++){
3684       rowlen = (PetscInt*)svalues + sstarts[i];
3685       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3686       for (j=0; j<nrows; j++) {
3687         row = srow[k] + b->rowners[rank]; /* global row idx */
3688         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3689         len += rowlen[j];
3690         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3691         k++;
3692       }
3693       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3694        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3695     }
3696     /* recvs and sends of i-array are completed */
3697     i = nrecvs;
3698     while (i--) {
3699       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3700     }
3701     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3702     /* allocate buffers for sending j and a arrays */
3703     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3704     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3705 
3706     /* create i-array of B_oth */
3707     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3708     b_othi[0] = 0;
3709     len = 0; /* total length of j or a array to be received */
3710     k = 0;
3711     for (i=0; i<nrecvs; i++){
3712       rowlen = (PetscInt*)rvalues + rstarts[i];
3713       nrows = rstarts[i+1]-rstarts[i];
3714       for (j=0; j<nrows; j++) {
3715         b_othi[k+1] = b_othi[k] + rowlen[j];
3716         len += rowlen[j]; k++;
3717       }
3718       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3719     }
3720 
3721     /* allocate space for j and a arrrays of B_oth */
3722     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3723     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3724 
3725     /* j-array */
3726     /*---------*/
3727     /*  post receives of j-array */
3728     for (i=0; i<nrecvs; i++){
3729       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3730       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3731     }
3732     k = 0;
3733     for (i=0; i<nsends; i++){
3734       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3735       bufJ = bufj+sstartsj[i];
3736       for (j=0; j<nrows; j++) {
3737         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3738         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3739         for (l=0; l<ncols; l++){
3740           *bufJ++ = cols[l];
3741         }
3742         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3743       }
3744       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3745     }
3746 
3747     /* recvs and sends of j-array are completed */
3748     i = nrecvs;
3749     while (i--) {
3750       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3751     }
3752     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3753   } else if (scall == MAT_REUSE_MATRIX){
3754     sstartsj = *startsj;
3755     rstartsj = sstartsj + nsends +1;
3756     bufa     = *bufa_ptr;
3757     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3758     b_otha   = b_oth->a;
3759   } else {
3760     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3761   }
3762 
3763   /* a-array */
3764   /*---------*/
3765   /*  post receives of a-array */
3766   for (i=0; i<nrecvs; i++){
3767     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3768     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3769   }
3770   k = 0;
3771   for (i=0; i<nsends; i++){
3772     nrows = sstarts[i+1]-sstarts[i];
3773     bufA = bufa+sstartsj[i];
3774     for (j=0; j<nrows; j++) {
3775       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3776       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3777       for (l=0; l<ncols; l++){
3778         *bufA++ = vals[l];
3779       }
3780       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3781 
3782     }
3783     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3784   }
3785   /* recvs and sends of a-array are completed */
3786   i = nrecvs;
3787   while (i--) {
3788     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3789   }
3790    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3791 
3792   if (scall == MAT_INITIAL_MATRIX){
3793     /* put together the new matrix */
3794     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3795 
3796     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3797     /* Since these are PETSc arrays, change flags to free them as necessary. */
3798     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3799     b_oth->freedata = PETSC_TRUE;
3800     b_oth->nonew    = 0;
3801 
3802     ierr = PetscFree(bufj);CHKERRQ(ierr);
3803     if (!startsj || !bufa_ptr){
3804       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3805       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3806     } else {
3807       *startsj  = sstartsj;
3808       *bufa_ptr = bufa;
3809     }
3810   }
3811   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3812 
3813   PetscFunctionReturn(0);
3814 }
3815 
3816 #undef __FUNCT__
3817 #define __FUNCT__ "MatGetCommunicationStructs"
3818 /*@C
3819   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
3820 
3821   Not Collective
3822 
3823   Input Parameters:
3824 . A - The matrix in mpiaij format
3825 
3826   Output Parameter:
3827 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
3828 . colmap - A map from global column index to local index into lvec
3829 - multScatter - A scatter from the argument of a matrix-vector product to lvec
3830 
3831   Level: developer
3832 
3833 @*/
3834 #if defined (PETSC_USE_CTABLE)
3835 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
3836 #else
3837 PetscErrorCode PETSCMAT_DLLEXPORT MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
3838 #endif
3839 {
3840   Mat_MPIAIJ *a;
3841 
3842   PetscFunctionBegin;
3843   PetscValidHeaderSpecific(A, MAT_COOKIE, 1);
3844   PetscValidPointer(lvec, 2)
3845   PetscValidPointer(colmap, 3)
3846   PetscValidPointer(multScatter, 4)
3847   a = (Mat_MPIAIJ *) A->data;
3848   if (lvec) *lvec = a->lvec;
3849   if (colmap) *colmap = a->colmap;
3850   if (multScatter) *multScatter = a->Mvctx;
3851   PetscFunctionReturn(0);
3852 }
3853 
3854 /*MC
3855    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3856 
3857    Options Database Keys:
3858 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3859 
3860   Level: beginner
3861 
3862 .seealso: MatCreateMPIAIJ
3863 M*/
3864 
3865 EXTERN_C_BEGIN
3866 #undef __FUNCT__
3867 #define __FUNCT__ "MatCreate_MPIAIJ"
3868 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3869 {
3870   Mat_MPIAIJ     *b;
3871   PetscErrorCode ierr;
3872   PetscInt       i;
3873   PetscMPIInt    size;
3874 
3875   PetscFunctionBegin;
3876   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3877 
3878   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3879   B->data         = (void*)b;
3880   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3881   B->factor       = 0;
3882   B->bs           = 1;
3883   B->assembled    = PETSC_FALSE;
3884   B->mapping      = 0;
3885 
3886   B->insertmode      = NOT_SET_VALUES;
3887   b->size            = size;
3888   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3889 
3890   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3891   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3892 
3893   /* the information in the maps duplicates the information computed below, eventually
3894      we should remove the duplicate information that is not contained in the maps */
3895   ierr = PetscMapInitialize(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3896   ierr = PetscMapInitialize(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3897 
3898   /* build local table of row and column ownerships */
3899   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3900   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3901   b->cowners = b->rowners + b->size + 2;
3902   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3903   b->rowners[0] = 0;
3904   for (i=2; i<=b->size; i++) {
3905     b->rowners[i] += b->rowners[i-1];
3906   }
3907   b->rstart = b->rowners[b->rank];
3908   b->rend   = b->rowners[b->rank+1];
3909   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3910   b->cowners[0] = 0;
3911   for (i=2; i<=b->size; i++) {
3912     b->cowners[i] += b->cowners[i-1];
3913   }
3914   b->cstart = b->cowners[b->rank];
3915   b->cend   = b->cowners[b->rank+1];
3916 
3917   /* build cache for off array entries formed */
3918   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3919   b->donotstash  = PETSC_FALSE;
3920   b->colmap      = 0;
3921   b->garray      = 0;
3922   b->roworiented = PETSC_TRUE;
3923 
3924   /* stuff used for matrix vector multiply */
3925   b->lvec      = PETSC_NULL;
3926   b->Mvctx     = PETSC_NULL;
3927 
3928   /* stuff for MatGetRow() */
3929   b->rowindices   = 0;
3930   b->rowvalues    = 0;
3931   b->getrowactive = PETSC_FALSE;
3932 
3933   /* Explicitly create 2 MATSEQAIJ matrices. */
3934   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3935   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3936   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3937   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3938   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3939   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3940   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3941   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3942 
3943   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3944                                      "MatStoreValues_MPIAIJ",
3945                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3946   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3947                                      "MatRetrieveValues_MPIAIJ",
3948                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3949   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3950 				     "MatGetDiagonalBlock_MPIAIJ",
3951                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3952   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3953 				     "MatIsTranspose_MPIAIJ",
3954 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3955   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3956 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3957 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3958   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3959 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3960 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3961   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3962 				     "MatDiagonalScaleLocal_MPIAIJ",
3963 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3964   PetscFunctionReturn(0);
3965 }
3966 EXTERN_C_END
3967 
3968 /*
3969     Special version for direct calls from Fortran
3970 */
3971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3972 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
3973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3974 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
3975 #endif
3976 
3977 /* Change these macros so can be used in void function */
3978 #undef CHKERRQ
3979 #define CHKERRQ(ierr) CHKERRABORT(mat->comm,ierr)
3980 #undef SETERRQ2
3981 #define SETERRQ2(ierr,b,c,d) CHKERRABORT(mat->comm,ierr)
3982 #undef SETERRQ
3983 #define SETERRQ(ierr,b) CHKERRABORT(mat->comm,ierr)
3984 
3985 EXTERN_C_BEGIN
3986 #undef __FUNCT__
3987 #define __FUNCT__ "matsetvaluesmpiaij_"
3988 void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv)
3989 {
3990   Mat            mat = *mmat;
3991   PetscInt       m = *mm, n = *mn;
3992   InsertMode     addv = *maddv;
3993   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3994   PetscScalar    value;
3995   PetscErrorCode ierr;
3996   MatPreallocated(mat);
3997   if (mat->insertmode == NOT_SET_VALUES) {
3998     mat->insertmode = addv;
3999   }
4000 #if defined(PETSC_USE_DEBUG)
4001   else if (mat->insertmode != addv) {
4002     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4003   }
4004 #endif
4005   {
4006   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
4007   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
4008   PetscTruth     roworiented = aij->roworiented;
4009 
4010   /* Some Variables required in the macro */
4011   Mat            A = aij->A;
4012   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
4013   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
4014   PetscScalar    *aa = a->a;
4015   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
4016   Mat            B = aij->B;
4017   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
4018   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
4019   PetscScalar    *ba = b->a;
4020 
4021   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
4022   PetscInt       nonew = a->nonew;
4023   PetscScalar    *ap1,*ap2;
4024 
4025   PetscFunctionBegin;
4026   for (i=0; i<m; i++) {
4027     if (im[i] < 0) continue;
4028 #if defined(PETSC_USE_DEBUG)
4029     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
4030 #endif
4031     if (im[i] >= rstart && im[i] < rend) {
4032       row      = im[i] - rstart;
4033       lastcol1 = -1;
4034       rp1      = aj + ai[row];
4035       ap1      = aa + ai[row];
4036       rmax1    = aimax[row];
4037       nrow1    = ailen[row];
4038       low1     = 0;
4039       high1    = nrow1;
4040       lastcol2 = -1;
4041       rp2      = bj + bi[row];
4042       ap2      = ba + bi[row];
4043       rmax2    = bimax[row];
4044       nrow2    = bilen[row];
4045       low2     = 0;
4046       high2    = nrow2;
4047 
4048       for (j=0; j<n; j++) {
4049         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
4050         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
4051         if (in[j] >= cstart && in[j] < cend){
4052           col = in[j] - cstart;
4053           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
4054         } else if (in[j] < 0) continue;
4055 #if defined(PETSC_USE_DEBUG)
4056         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
4057 #endif
4058         else {
4059           if (mat->was_assembled) {
4060             if (!aij->colmap) {
4061               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
4062             }
4063 #if defined (PETSC_USE_CTABLE)
4064             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4065 	    col--;
4066 #else
4067             col = aij->colmap[in[j]] - 1;
4068 #endif
4069             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
4070               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
4071               col =  in[j];
4072               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
4073               B = aij->B;
4074               b = (Mat_SeqAIJ*)B->data;
4075               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
4076               rp2      = bj + bi[row];
4077               ap2      = ba + bi[row];
4078               rmax2    = bimax[row];
4079               nrow2    = bilen[row];
4080               low2     = 0;
4081               high2    = nrow2;
4082               bm       = aij->B->m;
4083               ba = b->a;
4084             }
4085           } else col = in[j];
4086           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
4087         }
4088       }
4089     } else {
4090       if (!aij->donotstash) {
4091         if (roworiented) {
4092           if (ignorezeroentries && v[i*n] == 0.0) continue;
4093           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
4094         } else {
4095           if (ignorezeroentries && v[i] == 0.0) continue;
4096           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
4097         }
4098       }
4099     }
4100   }}
4101   PetscFunctionReturnVoid();
4102 }
4103 EXTERN_C_END
4104