xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision be29d3c6887e35e9e43a4f137c8657ac14f8b020)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
40     lastcol1 = col;\
41     while (high1-low1 > 5) { \
42       t = (low1+high1)/2; \
43       if (rp1[t] > col) high1 = t; \
44       else             low1  = t; \
45     } \
46       for (_i=low1; _i<high1; _i++) { \
47         if (rp1[_i] > col) break; \
48         if (rp1[_i] == col) { \
49           if (addv == ADD_VALUES) ap1[_i] += value;   \
50           else                    ap1[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
57       if (nrow1 >= rmax1) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
60         PetscScalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
63  \
64         /* malloc new storage space */ \
65         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(am+1)*sizeof(PetscInt); \
66         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
67         new_j   = (PetscInt*)(new_a + new_nz); \
68         new_i   = new_j + new_nz; \
69  \
70         /* copy over old data into new slots */ \
71         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
72         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
73         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \
74         len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \
75         ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \
76         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \
77         ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
78         /* free up old matrix storage */ \
79  \
80         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
81         if (!a->singlemalloc) { \
82            ierr = PetscFree(a->i);CHKERRQ(ierr); \
83            ierr = PetscFree(a->j);CHKERRQ(ierr); \
84         } \
85         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
86         a->singlemalloc = PETSC_TRUE; \
87  \
88         rp1   = aj + ai[row]; ap1 = aa + ai[row]; \
89         rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \
90         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
91         a->maxnz += CHUNKSIZE; \
92         a->reallocs++; \
93       } \
94       N = nrow1++ - 1; a->nz++; \
95       /* shift up all the later entries in this row */ \
96       for (ii=N; ii>=_i; ii--) { \
97         rp1[ii+1] = rp1[ii]; \
98         ap1[ii+1] = ap1[ii]; \
99       } \
100       rp1[_i] = col;  \
101       ap1[_i] = value;  \
102       a_noinsert: ; \
103       ailen[row] = nrow1; \
104 }
105 
106 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
107 { \
108     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
109     lastcol2 = col;\
110     while (high2-low2 > 5) { \
111       t = (low2+high2)/2; \
112       if (rp2[t] > col) high2 = t; \
113       else             low2  = t; \
114     } \
115        for (_i=low2; _i<high2; _i++) { \
116         if (rp2[_i] > col) break; \
117         if (rp2[_i] == col) { \
118           if (addv == ADD_VALUES) ap2[_i] += value;   \
119           else                    ap2[_i] = value; \
120           goto b_noinsert; \
121         } \
122       }  \
123       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
124       if (nonew == 1) goto b_noinsert; \
125       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126       if (nrow2 >= rmax2) { \
127         /* there is no extra room in row, therefore enlarge */ \
128         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
129         PetscScalar *new_a; \
130  \
131         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
132  \
133         /* malloc new storage space */ \
134         len     = new_nz*(sizeof(PetscInt)+sizeof(PetscScalar))+(bm+1)*sizeof(PetscInt); \
135         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
136         new_j   = (PetscInt*)(new_a + new_nz); \
137         new_i   = new_j + new_nz; \
138  \
139         /* copy over old data into new slots */ \
140         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
141         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
142         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \
143         len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \
144         ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \
145         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \
146         ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
147         /* free up old matrix storage */ \
148  \
149         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
150         if (!b->singlemalloc) { \
151           ierr = PetscFree(b->i);CHKERRQ(ierr); \
152           ierr = PetscFree(b->j);CHKERRQ(ierr); \
153         } \
154         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
155         b->singlemalloc = PETSC_TRUE; \
156  \
157         rp2   = bj + bi[row]; ap2 = ba + bi[row]; \
158         rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \
159         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
160         b->maxnz += CHUNKSIZE; \
161         b->reallocs++; \
162       } \
163       N = nrow2++ - 1; b->nz++; \
164       /* shift up all the later entries in this row */ \
165       for (ii=N; ii>=_i; ii--) { \
166         rp2[ii+1] = rp2[ii]; \
167         ap2[ii+1] = ap2[ii]; \
168       } \
169       rp2[_i] = col;  \
170       ap2[_i] = value;  \
171       b_noinsert: ; \
172       bilen[row] = nrow2; \
173 }
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatSetValues_MPIAIJ"
177 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
178 {
179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
180   PetscScalar    value;
181   PetscErrorCode ierr;
182   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
183   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
184   PetscTruth     roworiented = aij->roworiented;
185 
186   /* Some Variables required in the macro */
187   Mat            A = aij->A;
188   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
189   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
190   PetscScalar    *aa = a->a;
191   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
192   Mat            B = aij->B;
193   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
194   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
195   PetscScalar    *ba = b->a;
196 
197   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
198   PetscInt       nonew = a->nonew;
199   PetscScalar    *ap1,*ap2;
200 
201   PetscFunctionBegin;
202   for (i=0; i<m; i++) {
203     if (im[i] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
206 #endif
207     if (im[i] >= rstart && im[i] < rend) {
208       row      = im[i] - rstart;
209       lastcol1 = -1;
210       rp1      = aj + ai[row];
211       ap1      = aa + ai[row];
212       rmax1    = aimax[row];
213       nrow1    = ailen[row];
214       low1     = 0;
215       high1    = nrow1;
216       lastcol2 = -1;
217       rp2      = bj + bi[row];
218       ap2      = ba + bi[row];
219       rmax2    = bimax[row];
220       nrow2    = bilen[row];
221       low2     = 0;
222       high2    = nrow2;
223 
224       for (j=0; j<n; j++) {
225         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
227         if (in[j] >= cstart && in[j] < cend){
228           col = in[j] - cstart;
229           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!aij->colmap) {
237               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined (PETSC_USE_CTABLE)
240             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
241 	    col--;
242 #else
243             col = aij->colmap[in[j]] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
246               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
247               col =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
249               B = aij->B;
250               b = (Mat_SeqAIJ*)B->data;
251               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
252               rp2      = bj + bi[row];
253               ap2      = ba + bi[row];
254               rmax2    = bimax[row];
255               nrow2    = bilen[row];
256               low2     = 0;
257               high2    = nrow2;
258               ba = b->a;
259             }
260           } else col = in[j];
261           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
262         }
263       }
264     } else {
265       if (!aij->donotstash) {
266         if (roworiented) {
267           if (ignorezeroentries && v[i*n] == 0.0) continue;
268           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
269         } else {
270           if (ignorezeroentries && v[i] == 0.0) continue;
271           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
272         }
273       }
274     }
275   }
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatGetValues_MPIAIJ"
281 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
282 {
283   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
286   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
287 
288   PetscFunctionBegin;
289   for (i=0; i<m; i++) {
290     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
291     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
292     if (idxm[i] >= rstart && idxm[i] < rend) {
293       row = idxm[i] - rstart;
294       for (j=0; j<n; j++) {
295         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
296         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
297         if (idxn[j] >= cstart && idxn[j] < cend){
298           col = idxn[j] - cstart;
299           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
300         } else {
301           if (!aij->colmap) {
302             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
303           }
304 #if defined (PETSC_USE_CTABLE)
305           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
306           col --;
307 #else
308           col = aij->colmap[idxn[j]] - 1;
309 #endif
310           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
311           else {
312             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
313           }
314         }
315       }
316     } else {
317       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
318     }
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 #undef __FUNCT__
324 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
325 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
326 {
327   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
328   PetscErrorCode ierr;
329   PetscInt       nstash,reallocs;
330   InsertMode     addv;
331 
332   PetscFunctionBegin;
333   if (aij->donotstash) {
334     PetscFunctionReturn(0);
335   }
336 
337   /* make sure all processors are either in INSERTMODE or ADDMODE */
338   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
339   if (addv == (ADD_VALUES|INSERT_VALUES)) {
340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
341   }
342   mat->insertmode = addv; /* in case this processor had no cache */
343 
344   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
345   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
346   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
352 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
353 {
354   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
355   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
356   PetscErrorCode ierr;
357   PetscMPIInt    n;
358   PetscInt       i,j,rstart,ncols,flg;
359   PetscInt       *row,*col,other_disassembled;
360   PetscScalar    *val;
361   InsertMode     addv = mat->insertmode;
362 
363   PetscFunctionBegin;
364   if (!aij->donotstash) {
365     while (1) {
366       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
367       if (!flg) break;
368 
369       for (i=0; i<n;) {
370         /* Now identify the consecutive vals belonging to the same row */
371         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
372         if (j < n) ncols = j-i;
373         else       ncols = n-i;
374         /* Now assemble all these values with a single function call */
375         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
376         i = j;
377       }
378     }
379     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
380   }
381   a->compressedrow.use     = PETSC_FALSE;
382   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
383   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
384 
385   /* determine if any processor has disassembled, if so we must
386      also disassemble ourselfs, in order that we may reassemble. */
387   /*
388      if nonzero structure of submatrix B cannot change then we know that
389      no processor disassembled thus we can skip this stuff
390   */
391   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
392     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
393     if (mat->was_assembled && !other_disassembled) {
394       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
395     }
396   }
397   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
398   b    = (Mat_SeqAIJ *)aij->B->data;
399 
400   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
401     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
402   }
403   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
404   b->compressedrow.use = PETSC_TRUE;
405   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
406   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
407 
408   if (aij->rowvalues) {
409     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
410     aij->rowvalues = 0;
411   }
412 
413   /* used by MatAXPY() */
414   a->xtoy = 0; b->xtoy = 0;
415   a->XtoY = 0; b->XtoY = 0;
416 
417   PetscFunctionReturn(0);
418 }
419 
420 #undef __FUNCT__
421 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
422 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
423 {
424   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
425   PetscErrorCode ierr;
426 
427   PetscFunctionBegin;
428   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
429   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatZeroRows_MPIAIJ"
435 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
436 {
437   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
438   PetscErrorCode ierr;
439   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
440   PetscInt       i,N,*rows,*owners = l->rowners;
441   PetscInt       *nprocs,j,idx,nsends,row;
442   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
443   PetscInt       *rvalues,count,base,slen,*source;
444   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
445   MPI_Comm       comm = A->comm;
446   MPI_Request    *send_waits,*recv_waits;
447   MPI_Status     recv_status,*send_status;
448   IS             istmp;
449 #if defined(PETSC_DEBUG)
450   PetscTruth     found = PETSC_FALSE;
451 #endif
452 
453   PetscFunctionBegin;
454   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
455   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
456 
457   /*  first count number of contributors to each processor */
458   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
459   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
460   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
461   j = 0;
462   for (i=0; i<N; i++) {
463     if (lastidx > (idx = rows[i])) j = 0;
464     lastidx = idx;
465     for (; j<size; j++) {
466       if (idx >= owners[j] && idx < owners[j+1]) {
467         nprocs[2*j]++;
468         nprocs[2*j+1] = 1;
469         owner[i] = j;
470 #if defined(PETSC_DEBUG)
471         found = PETSC_TRUE;
472 #endif
473         break;
474       }
475     }
476 #if defined(PETSC_DEBUG)
477     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
478     found = PETSC_FALSE;
479 #endif
480   }
481   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
482 
483   /* inform other processors of number of messages and max length*/
484   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
485 
486   /* post receives:   */
487   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
488   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
489   for (i=0; i<nrecvs; i++) {
490     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
491   }
492 
493   /* do sends:
494       1) starts[i] gives the starting index in svalues for stuff going to
495          the ith processor
496   */
497   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
498   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
499   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
500   starts[0] = 0;
501   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
502   for (i=0; i<N; i++) {
503     svalues[starts[owner[i]]++] = rows[i];
504   }
505   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
506 
507   starts[0] = 0;
508   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
509   count = 0;
510   for (i=0; i<size; i++) {
511     if (nprocs[2*i+1]) {
512       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
513     }
514   }
515   ierr = PetscFree(starts);CHKERRQ(ierr);
516 
517   base = owners[rank];
518 
519   /*  wait on receives */
520   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
521   source = lens + nrecvs;
522   count  = nrecvs; slen = 0;
523   while (count) {
524     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
525     /* unpack receives into our local space */
526     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
527     source[imdex]  = recv_status.MPI_SOURCE;
528     lens[imdex]    = n;
529     slen          += n;
530     count--;
531   }
532   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
533 
534   /* move the data into the send scatter */
535   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
536   count = 0;
537   for (i=0; i<nrecvs; i++) {
538     values = rvalues + i*nmax;
539     for (j=0; j<lens[i]; j++) {
540       lrows[count++] = values[j] - base;
541     }
542   }
543   ierr = PetscFree(rvalues);CHKERRQ(ierr);
544   ierr = PetscFree(lens);CHKERRQ(ierr);
545   ierr = PetscFree(owner);CHKERRQ(ierr);
546   ierr = PetscFree(nprocs);CHKERRQ(ierr);
547 
548   /* actually zap the local rows */
549   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
550   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
551 
552   /*
553         Zero the required rows. If the "diagonal block" of the matrix
554      is square and the user wishes to set the diagonal we use seperate
555      code so that MatSetValues() is not called for each diagonal allocating
556      new memory, thus calling lots of mallocs and slowing things down.
557 
558        Contributed by: Mathew Knepley
559   */
560   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
561   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
562   if (diag && (l->A->M == l->A->N)) {
563     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
564   } else if (diag) {
565     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
566     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
567       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
568 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
569     }
570     for (i = 0; i < slen; i++) {
571       row  = lrows[i] + rstart;
572       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
573     }
574     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
575     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
576   } else {
577     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
578   }
579   ierr = ISDestroy(istmp);CHKERRQ(ierr);
580   ierr = PetscFree(lrows);CHKERRQ(ierr);
581 
582   /* wait on sends */
583   if (nsends) {
584     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
585     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
586     ierr = PetscFree(send_status);CHKERRQ(ierr);
587   }
588   ierr = PetscFree(send_waits);CHKERRQ(ierr);
589   ierr = PetscFree(svalues);CHKERRQ(ierr);
590 
591   PetscFunctionReturn(0);
592 }
593 
594 #undef __FUNCT__
595 #define __FUNCT__ "MatMult_MPIAIJ"
596 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
597 {
598   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
599   PetscErrorCode ierr;
600   PetscInt       nt;
601 
602   PetscFunctionBegin;
603   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
604   if (nt != A->n) {
605     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
606   }
607   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
609   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
610   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNCT__
615 #define __FUNCT__ "MatMultAdd_MPIAIJ"
616 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
617 {
618   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
619   PetscErrorCode ierr;
620 
621   PetscFunctionBegin;
622   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
623   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
624   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
625   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
626   PetscFunctionReturn(0);
627 }
628 
629 #undef __FUNCT__
630 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
631 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
632 {
633   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
634   PetscErrorCode ierr;
635   PetscTruth     merged;
636 
637   PetscFunctionBegin;
638   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
639   /* do nondiagonal part */
640   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
641   if (!merged) {
642     /* send it on its way */
643     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644     /* do local part */
645     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
646     /* receive remote parts: note this assumes the values are not actually */
647     /* added in yy until the next line, */
648     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649   } else {
650     /* do local part */
651     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
652     /* send it on its way */
653     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
654     /* values actually were received in the Begin() but we need to call this nop */
655     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 EXTERN_C_BEGIN
661 #undef __FUNCT__
662 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
663 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
664 {
665   MPI_Comm       comm;
666   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
667   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
668   IS             Me,Notme;
669   PetscErrorCode ierr;
670   PetscInt       M,N,first,last,*notme,i;
671   PetscMPIInt    size;
672 
673   PetscFunctionBegin;
674 
675   /* Easy test: symmetric diagonal block */
676   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
677   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
678   if (!*f) PetscFunctionReturn(0);
679   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
681   if (size == 1) PetscFunctionReturn(0);
682 
683   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
684   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
685   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
686   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
687   for (i=0; i<first; i++) notme[i] = i;
688   for (i=last; i<M; i++) notme[i-last+first] = i;
689   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
690   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
691   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
692   Aoff = Aoffs[0];
693   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
694   Boff = Boffs[0];
695   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
696   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
697   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
698   ierr = ISDestroy(Me);CHKERRQ(ierr);
699   ierr = ISDestroy(Notme);CHKERRQ(ierr);
700 
701   PetscFunctionReturn(0);
702 }
703 EXTERN_C_END
704 
705 #undef __FUNCT__
706 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
707 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
708 {
709   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
710   PetscErrorCode ierr;
711 
712   PetscFunctionBegin;
713   /* do nondiagonal part */
714   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
715   /* send it on its way */
716   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
717   /* do local part */
718   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
719   /* receive remote parts */
720   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   This only works correctly for square matrices where the subblock A->A is the
726    diagonal block
727 */
728 #undef __FUNCT__
729 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
730 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
731 {
732   PetscErrorCode ierr;
733   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
734 
735   PetscFunctionBegin;
736   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
737   if (a->rstart != a->cstart || a->rend != a->cend) {
738     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
739   }
740   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatScale_MPIAIJ"
746 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
747 {
748   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
753   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatDestroy_MPIAIJ"
759 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
760 {
761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
762   PetscErrorCode ierr;
763 
764   PetscFunctionBegin;
765 #if defined(PETSC_USE_LOG)
766   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
767 #endif
768   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
769   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
770   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
771   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
772 #if defined (PETSC_USE_CTABLE)
773   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
774 #else
775   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
776 #endif
777   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
778   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
779   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
780   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
781   ierr = PetscFree(aij);CHKERRQ(ierr);
782 
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatView_MPIAIJ_Binary"
795 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
796 {
797   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
798   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
799   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
800   PetscErrorCode    ierr;
801   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
802   int               fd;
803   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
804   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
805   PetscScalar       *column_values;
806 
807   PetscFunctionBegin;
808   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
809   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
810   nz   = A->nz + B->nz;
811   if (!rank) {
812     header[0] = MAT_FILE_COOKIE;
813     header[1] = mat->M;
814     header[2] = mat->N;
815     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
816     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
817     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
818     /* get largest number of rows any processor has */
819     rlen = mat->m;
820     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
821     for (i=1; i<size; i++) {
822       rlen = PetscMax(rlen,range[i+1] - range[i]);
823     }
824   } else {
825     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
826     rlen = mat->m;
827   }
828 
829   /* load up the local row counts */
830   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
831   for (i=0; i<mat->m; i++) {
832     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
833   }
834 
835   /* store the row lengths to the file */
836   if (!rank) {
837     MPI_Status status;
838     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
839     for (i=1; i<size; i++) {
840       rlen = range[i+1] - range[i];
841       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
842       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
843     }
844   } else {
845     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
848 
849   /* load up the local column indices */
850   nzmax = nz; /* )th processor needs space a largest processor needs */
851   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
852   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
853   cnt  = 0;
854   for (i=0; i<mat->m; i++) {
855     for (j=B->i[i]; j<B->i[i+1]; j++) {
856       if ( (col = garray[B->j[j]]) > cstart) break;
857       column_indices[cnt++] = col;
858     }
859     for (k=A->i[i]; k<A->i[i+1]; k++) {
860       column_indices[cnt++] = A->j[k] + cstart;
861     }
862     for (; j<B->i[i+1]; j++) {
863       column_indices[cnt++] = garray[B->j[j]];
864     }
865   }
866   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
867 
868   /* store the column indices to the file */
869   if (!rank) {
870     MPI_Status status;
871     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
872     for (i=1; i<size; i++) {
873       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
875       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
876       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
877     }
878   } else {
879     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
880     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(column_indices);CHKERRQ(ierr);
883 
884   /* load up the local column values */
885   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
886   cnt  = 0;
887   for (i=0; i<mat->m; i++) {
888     for (j=B->i[i]; j<B->i[i+1]; j++) {
889       if ( garray[B->j[j]] > cstart) break;
890       column_values[cnt++] = B->a[j];
891     }
892     for (k=A->i[i]; k<A->i[i+1]; k++) {
893       column_values[cnt++] = A->a[k];
894     }
895     for (; j<B->i[i+1]; j++) {
896       column_values[cnt++] = B->a[j];
897     }
898   }
899   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
900 
901   /* store the column values to the file */
902   if (!rank) {
903     MPI_Status status;
904     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
905     for (i=1; i<size; i++) {
906       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
907       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
908       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
909       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
910     }
911   } else {
912     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
913     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
914   }
915   ierr = PetscFree(column_values);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 
919 #undef __FUNCT__
920 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
921 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
922 {
923   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
924   PetscErrorCode    ierr;
925   PetscMPIInt       rank = aij->rank,size = aij->size;
926   PetscTruth        isdraw,iascii,isbinary;
927   PetscViewer       sviewer;
928   PetscViewerFormat format;
929 
930   PetscFunctionBegin;
931   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
932   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
933   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
934   if (iascii) {
935     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
936     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
937       MatInfo    info;
938       PetscTruth inodes;
939 
940       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
941       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
942       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
943       if (!inodes) {
944         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
945 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
946       } else {
947         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
948 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
949       }
950       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
951       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
952       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
953       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
954       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
955       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
956       PetscFunctionReturn(0);
957     } else if (format == PETSC_VIEWER_ASCII_INFO) {
958       PetscInt   inodecount,inodelimit,*inodes;
959       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
960       if (inodes) {
961         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
962       } else {
963         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
964       }
965       PetscFunctionReturn(0);
966     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
967       PetscFunctionReturn(0);
968     }
969   } else if (isbinary) {
970     if (size == 1) {
971       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
972       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
973     } else {
974       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
975     }
976     PetscFunctionReturn(0);
977   } else if (isdraw) {
978     PetscDraw  draw;
979     PetscTruth isnull;
980     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
981     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
982   }
983 
984   if (size == 1) {
985     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
986     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqAIJ  *Aloc;
991     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
992     PetscScalar *a;
993 
994     if (!rank) {
995       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
996     } else {
997       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
998     }
999     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1000     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1001     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1002     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1003 
1004     /* copy over the A part */
1005     Aloc = (Mat_SeqAIJ*)aij->A->data;
1006     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1007     row = aij->rstart;
1008     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1009     for (i=0; i<m; i++) {
1010       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1011       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1012     }
1013     aj = Aloc->j;
1014     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1015 
1016     /* copy over the B part */
1017     Aloc = (Mat_SeqAIJ*)aij->B->data;
1018     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1019     row  = aij->rstart;
1020     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1021     ct   = cols;
1022     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1023     for (i=0; i<m; i++) {
1024       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1025       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1026     }
1027     ierr = PetscFree(ct);CHKERRQ(ierr);
1028     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1029     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1030     /*
1031        Everyone has to call to draw the matrix since the graphics waits are
1032        synchronized across all processors that share the PetscDraw object
1033     */
1034     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1035     if (!rank) {
1036       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1037       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1038     }
1039     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1040     ierr = MatDestroy(A);CHKERRQ(ierr);
1041   }
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatView_MPIAIJ"
1047 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1048 {
1049   PetscErrorCode ierr;
1050   PetscTruth     iascii,isdraw,issocket,isbinary;
1051 
1052   PetscFunctionBegin;
1053   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1054   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1055   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1056   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1057   if (iascii || isdraw || isbinary || issocket) {
1058     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1059   } else {
1060     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRelax_MPIAIJ"
1069 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1070 {
1071   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1072   PetscErrorCode ierr;
1073   Vec            bb1;
1074   PetscScalar    mone=-1.0;
1075 
1076   PetscFunctionBegin;
1077   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1078 
1079   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1080 
1081   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1082     if (flag & SOR_ZERO_INITIAL_GUESS) {
1083       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1084       its--;
1085     }
1086 
1087     while (its--) {
1088       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1089       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1090 
1091       /* update rhs: bb1 = bb - B*x */
1092       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1093       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1094 
1095       /* local sweep */
1096       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1097       CHKERRQ(ierr);
1098     }
1099   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1100     if (flag & SOR_ZERO_INITIAL_GUESS) {
1101       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1102       its--;
1103     }
1104     while (its--) {
1105       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1106       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1107 
1108       /* update rhs: bb1 = bb - B*x */
1109       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1110       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1111 
1112       /* local sweep */
1113       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1114       CHKERRQ(ierr);
1115     }
1116   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1117     if (flag & SOR_ZERO_INITIAL_GUESS) {
1118       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1119       its--;
1120     }
1121     while (its--) {
1122       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1123       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1124 
1125       /* update rhs: bb1 = bb - B*x */
1126       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1127       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1128 
1129       /* local sweep */
1130       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1131       CHKERRQ(ierr);
1132     }
1133   } else {
1134     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1135   }
1136 
1137   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1143 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1146   Mat            A = mat->A,B = mat->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size     = 1.0;
1152   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1157   isend[3] += info->memory;  isend[4] += info->mallocs;
1158   if (flag == MAT_LOCAL) {
1159     info->nz_used      = isend[0];
1160     info->nz_allocated = isend[1];
1161     info->nz_unneeded  = isend[2];
1162     info->memory       = isend[3];
1163     info->mallocs      = isend[4];
1164   } else if (flag == MAT_GLOBAL_MAX) {
1165     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   }
1179   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1180   info->fill_ratio_needed = 0;
1181   info->factor_mallocs    = 0;
1182   info->rows_global       = (double)matin->M;
1183   info->columns_global    = (double)matin->N;
1184   info->rows_local        = (double)matin->m;
1185   info->columns_local     = (double)matin->N;
1186 
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatSetOption_MPIAIJ"
1192 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1193 {
1194   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NO_NEW_NONZERO_LOCATIONS:
1200   case MAT_YES_NEW_NONZERO_LOCATIONS:
1201   case MAT_COLUMNS_UNSORTED:
1202   case MAT_COLUMNS_SORTED:
1203   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1204   case MAT_KEEP_ZEROED_ROWS:
1205   case MAT_NEW_NONZERO_LOCATION_ERR:
1206   case MAT_USE_INODES:
1207   case MAT_DO_NOT_USE_INODES:
1208   case MAT_IGNORE_ZERO_ENTRIES:
1209     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1211     break;
1212   case MAT_ROW_ORIENTED:
1213     a->roworiented = PETSC_TRUE;
1214     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1216     break;
1217   case MAT_ROWS_SORTED:
1218   case MAT_ROWS_UNSORTED:
1219   case MAT_YES_NEW_DIAGONALS:
1220     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1221     break;
1222   case MAT_COLUMN_ORIENTED:
1223     a->roworiented = PETSC_FALSE;
1224     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1225     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1226     break;
1227   case MAT_IGNORE_OFF_PROC_ENTRIES:
1228     a->donotstash = PETSC_TRUE;
1229     break;
1230   case MAT_NO_NEW_DIAGONALS:
1231     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1232   case MAT_SYMMETRIC:
1233   case MAT_STRUCTURALLY_SYMMETRIC:
1234   case MAT_HERMITIAN:
1235   case MAT_SYMMETRY_ETERNAL:
1236     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1237     break;
1238   case MAT_NOT_SYMMETRIC:
1239   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1240   case MAT_NOT_HERMITIAN:
1241   case MAT_NOT_SYMMETRY_ETERNAL:
1242     break;
1243   default:
1244     SETERRQ(PETSC_ERR_SUP,"unknown option");
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetRow_MPIAIJ"
1251 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1252 {
1253   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1254   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1255   PetscErrorCode ierr;
1256   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1257   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1258   PetscInt       *cmap,*idx_p;
1259 
1260   PetscFunctionBegin;
1261   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1262   mat->getrowactive = PETSC_TRUE;
1263 
1264   if (!mat->rowvalues && (idx || v)) {
1265     /*
1266         allocate enough space to hold information from the longest row.
1267     */
1268     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1269     PetscInt     max = 1,tmp;
1270     for (i=0; i<matin->m; i++) {
1271       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1272       if (max < tmp) { max = tmp; }
1273     }
1274     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1275     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1276   }
1277 
1278   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1279   lrow = row - rstart;
1280 
1281   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1282   if (!v)   {pvA = 0; pvB = 0;}
1283   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1284   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1285   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1286   nztot = nzA + nzB;
1287 
1288   cmap  = mat->garray;
1289   if (v  || idx) {
1290     if (nztot) {
1291       /* Sort by increasing column numbers, assuming A and B already sorted */
1292       PetscInt imark = -1;
1293       if (v) {
1294         *v = v_p = mat->rowvalues;
1295         for (i=0; i<nzB; i++) {
1296           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1297           else break;
1298         }
1299         imark = i;
1300         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1301         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1302       }
1303       if (idx) {
1304         *idx = idx_p = mat->rowindices;
1305         if (imark > -1) {
1306           for (i=0; i<imark; i++) {
1307             idx_p[i] = cmap[cworkB[i]];
1308           }
1309         } else {
1310           for (i=0; i<nzB; i++) {
1311             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1312             else break;
1313           }
1314           imark = i;
1315         }
1316         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1317         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1318       }
1319     } else {
1320       if (idx) *idx = 0;
1321       if (v)   *v   = 0;
1322     }
1323   }
1324   *nz = nztot;
1325   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1326   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 #undef __FUNCT__
1331 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1332 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1335 
1336   PetscFunctionBegin;
1337   if (!aij->getrowactive) {
1338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1339   }
1340   aij->getrowactive = PETSC_FALSE;
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatNorm_MPIAIJ"
1346 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1347 {
1348   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1349   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1350   PetscErrorCode ierr;
1351   PetscInt       i,j,cstart = aij->cstart;
1352   PetscReal      sum = 0.0;
1353   PetscScalar    *v;
1354 
1355   PetscFunctionBegin;
1356   if (aij->size == 1) {
1357     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1358   } else {
1359     if (type == NORM_FROBENIUS) {
1360       v = amat->a;
1361       for (i=0; i<amat->nz; i++) {
1362 #if defined(PETSC_USE_COMPLEX)
1363         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1364 #else
1365         sum += (*v)*(*v); v++;
1366 #endif
1367       }
1368       v = bmat->a;
1369       for (i=0; i<bmat->nz; i++) {
1370 #if defined(PETSC_USE_COMPLEX)
1371         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1372 #else
1373         sum += (*v)*(*v); v++;
1374 #endif
1375       }
1376       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1377       *norm = sqrt(*norm);
1378     } else if (type == NORM_1) { /* max column norm */
1379       PetscReal *tmp,*tmp2;
1380       PetscInt    *jj,*garray = aij->garray;
1381       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1382       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1383       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1384       *norm = 0.0;
1385       v = amat->a; jj = amat->j;
1386       for (j=0; j<amat->nz; j++) {
1387         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1388       }
1389       v = bmat->a; jj = bmat->j;
1390       for (j=0; j<bmat->nz; j++) {
1391         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1392       }
1393       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1394       for (j=0; j<mat->N; j++) {
1395         if (tmp2[j] > *norm) *norm = tmp2[j];
1396       }
1397       ierr = PetscFree(tmp);CHKERRQ(ierr);
1398       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1399     } else if (type == NORM_INFINITY) { /* max row norm */
1400       PetscReal ntemp = 0.0;
1401       for (j=0; j<aij->A->m; j++) {
1402         v = amat->a + amat->i[j];
1403         sum = 0.0;
1404         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1405           sum += PetscAbsScalar(*v); v++;
1406         }
1407         v = bmat->a + bmat->i[j];
1408         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1409           sum += PetscAbsScalar(*v); v++;
1410         }
1411         if (sum > ntemp) ntemp = sum;
1412       }
1413       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1414     } else {
1415       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1416     }
1417   }
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatTranspose_MPIAIJ"
1423 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1424 {
1425   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1426   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1427   PetscErrorCode ierr;
1428   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1429   Mat            B;
1430   PetscScalar    *array;
1431 
1432   PetscFunctionBegin;
1433   if (!matout && M != N) {
1434     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1435   }
1436 
1437   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1438   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1439   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1440 
1441   /* copy over the A part */
1442   Aloc = (Mat_SeqAIJ*)a->A->data;
1443   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1444   row = a->rstart;
1445   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1446   for (i=0; i<m; i++) {
1447     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1448     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1449   }
1450   aj = Aloc->j;
1451   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1452 
1453   /* copy over the B part */
1454   Aloc = (Mat_SeqAIJ*)a->B->data;
1455   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1456   row  = a->rstart;
1457   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1458   ct   = cols;
1459   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1460   for (i=0; i<m; i++) {
1461     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1462     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1463   }
1464   ierr = PetscFree(ct);CHKERRQ(ierr);
1465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1467   if (matout) {
1468     *matout = B;
1469   } else {
1470     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1477 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1478 {
1479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1480   Mat            a = aij->A,b = aij->B;
1481   PetscErrorCode ierr;
1482   PetscInt       s1,s2,s3;
1483 
1484   PetscFunctionBegin;
1485   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1486   if (rr) {
1487     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1488     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1489     /* Overlap communication with computation. */
1490     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1491   }
1492   if (ll) {
1493     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1494     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1495     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1496   }
1497   /* scale  the diagonal block */
1498   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1499 
1500   if (rr) {
1501     /* Do a scatter end and then right scale the off-diagonal block */
1502     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1503     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1504   }
1505 
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1512 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1513 {
1514   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1515   PetscErrorCode ierr;
1516 
1517   PetscFunctionBegin;
1518   if (!a->rank) {
1519     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1526 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1527 {
1528   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1533   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1538 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1539 {
1540   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatEqual_MPIAIJ"
1550 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1551 {
1552   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1553   Mat            a,b,c,d;
1554   PetscTruth     flg;
1555   PetscErrorCode ierr;
1556 
1557   PetscFunctionBegin;
1558   a = matA->A; b = matA->B;
1559   c = matB->A; d = matB->B;
1560 
1561   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1562   if (flg) {
1563     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1564   }
1565   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatCopy_MPIAIJ"
1571 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1572 {
1573   PetscErrorCode ierr;
1574   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1575   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1576 
1577   PetscFunctionBegin;
1578   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1579   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1580     /* because of the column compression in the off-processor part of the matrix a->B,
1581        the number of columns in a->B and b->B may be different, hence we cannot call
1582        the MatCopy() directly on the two parts. If need be, we can provide a more
1583        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1584        then copying the submatrices */
1585     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1586   } else {
1587     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1588     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1589   }
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1595 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1596 {
1597   PetscErrorCode ierr;
1598 
1599   PetscFunctionBegin;
1600   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 #include "petscblaslapack.h"
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatAXPY_MPIAIJ"
1607 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1608 {
1609   PetscErrorCode ierr;
1610   PetscInt       i;
1611   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1612   PetscBLASInt   bnz,one=1;
1613   Mat_SeqAIJ     *x,*y;
1614 
1615   PetscFunctionBegin;
1616   if (str == SAME_NONZERO_PATTERN) {
1617     x = (Mat_SeqAIJ *)xx->A->data;
1618     y = (Mat_SeqAIJ *)yy->A->data;
1619     bnz = (PetscBLASInt)x->nz;
1620     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1621     x = (Mat_SeqAIJ *)xx->B->data;
1622     y = (Mat_SeqAIJ *)yy->B->data;
1623     bnz = (PetscBLASInt)x->nz;
1624     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1625   } else if (str == SUBSET_NONZERO_PATTERN) {
1626     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1627 
1628     x = (Mat_SeqAIJ *)xx->B->data;
1629     y = (Mat_SeqAIJ *)yy->B->data;
1630     if (y->xtoy && y->XtoY != xx->B) {
1631       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1632       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1633     }
1634     if (!y->xtoy) { /* get xtoy */
1635       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1636       y->XtoY = xx->B;
1637     }
1638     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1639   } else {
1640     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1641   }
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 /* -------------------------------------------------------------------*/
1646 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1647        MatGetRow_MPIAIJ,
1648        MatRestoreRow_MPIAIJ,
1649        MatMult_MPIAIJ,
1650 /* 4*/ MatMultAdd_MPIAIJ,
1651        MatMultTranspose_MPIAIJ,
1652        MatMultTransposeAdd_MPIAIJ,
1653        0,
1654        0,
1655        0,
1656 /*10*/ 0,
1657        0,
1658        0,
1659        MatRelax_MPIAIJ,
1660        MatTranspose_MPIAIJ,
1661 /*15*/ MatGetInfo_MPIAIJ,
1662        MatEqual_MPIAIJ,
1663        MatGetDiagonal_MPIAIJ,
1664        MatDiagonalScale_MPIAIJ,
1665        MatNorm_MPIAIJ,
1666 /*20*/ MatAssemblyBegin_MPIAIJ,
1667        MatAssemblyEnd_MPIAIJ,
1668        0,
1669        MatSetOption_MPIAIJ,
1670        MatZeroEntries_MPIAIJ,
1671 /*25*/ MatZeroRows_MPIAIJ,
1672        0,
1673        0,
1674        0,
1675        0,
1676 /*30*/ MatSetUpPreallocation_MPIAIJ,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*35*/ MatDuplicate_MPIAIJ,
1682        0,
1683        0,
1684        0,
1685        0,
1686 /*40*/ MatAXPY_MPIAIJ,
1687        MatGetSubMatrices_MPIAIJ,
1688        MatIncreaseOverlap_MPIAIJ,
1689        MatGetValues_MPIAIJ,
1690        MatCopy_MPIAIJ,
1691 /*45*/ MatPrintHelp_MPIAIJ,
1692        MatScale_MPIAIJ,
1693        0,
1694        0,
1695        0,
1696 /*50*/ MatSetBlockSize_MPIAIJ,
1697        0,
1698        0,
1699        0,
1700        0,
1701 /*55*/ MatFDColoringCreate_MPIAIJ,
1702        0,
1703        MatSetUnfactored_MPIAIJ,
1704        0,
1705        0,
1706 /*60*/ MatGetSubMatrix_MPIAIJ,
1707        MatDestroy_MPIAIJ,
1708        MatView_MPIAIJ,
1709        MatGetPetscMaps_Petsc,
1710        0,
1711 /*65*/ 0,
1712        0,
1713        0,
1714        0,
1715        0,
1716 /*70*/ 0,
1717        0,
1718        MatSetColoring_MPIAIJ,
1719 #if defined(PETSC_HAVE_ADIC)
1720        MatSetValuesAdic_MPIAIJ,
1721 #else
1722        0,
1723 #endif
1724        MatSetValuesAdifor_MPIAIJ,
1725 /*75*/ 0,
1726        0,
1727        0,
1728        0,
1729        0,
1730 /*80*/ 0,
1731        0,
1732        0,
1733        0,
1734 /*84*/ MatLoad_MPIAIJ,
1735        0,
1736        0,
1737        0,
1738        0,
1739        0,
1740 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1741        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1742        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1743        MatPtAP_Basic,
1744        MatPtAPSymbolic_MPIAIJ,
1745 /*95*/ MatPtAPNumeric_MPIAIJ,
1746        0,
1747        0,
1748        0,
1749        0,
1750 /*100*/0,
1751        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1752        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1753 };
1754 
1755 /* ----------------------------------------------------------------------------------------*/
1756 
1757 EXTERN_C_BEGIN
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1760 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1761 {
1762   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1763   PetscErrorCode ierr;
1764 
1765   PetscFunctionBegin;
1766   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1767   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 EXTERN_C_END
1771 
1772 EXTERN_C_BEGIN
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1775 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1776 {
1777   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1778   PetscErrorCode ierr;
1779 
1780   PetscFunctionBegin;
1781   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1782   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 EXTERN_C_END
1786 
1787 #include "petscpc.h"
1788 EXTERN_C_BEGIN
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1791 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1792 {
1793   Mat_MPIAIJ     *b;
1794   PetscErrorCode ierr;
1795   PetscInt       i;
1796 
1797   PetscFunctionBegin;
1798   B->preallocated = PETSC_TRUE;
1799   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1800   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1801   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1802   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1803   if (d_nnz) {
1804     for (i=0; i<B->m; i++) {
1805       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1806     }
1807   }
1808   if (o_nnz) {
1809     for (i=0; i<B->m; i++) {
1810       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1811     }
1812   }
1813   b = (Mat_MPIAIJ*)B->data;
1814   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1815   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1816 
1817   PetscFunctionReturn(0);
1818 }
1819 EXTERN_C_END
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1823 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1824 {
1825   Mat            mat;
1826   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1827   PetscErrorCode ierr;
1828 
1829   PetscFunctionBegin;
1830   *newmat       = 0;
1831   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1832   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1833   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1834   a    = (Mat_MPIAIJ*)mat->data;
1835 
1836   mat->factor       = matin->factor;
1837   mat->bs           = matin->bs;
1838   mat->assembled    = PETSC_TRUE;
1839   mat->insertmode   = NOT_SET_VALUES;
1840   mat->preallocated = PETSC_TRUE;
1841 
1842   a->rstart       = oldmat->rstart;
1843   a->rend         = oldmat->rend;
1844   a->cstart       = oldmat->cstart;
1845   a->cend         = oldmat->cend;
1846   a->size         = oldmat->size;
1847   a->rank         = oldmat->rank;
1848   a->donotstash   = oldmat->donotstash;
1849   a->roworiented  = oldmat->roworiented;
1850   a->rowindices   = 0;
1851   a->rowvalues    = 0;
1852   a->getrowactive = PETSC_FALSE;
1853 
1854   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1855   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1856   if (oldmat->colmap) {
1857 #if defined (PETSC_USE_CTABLE)
1858     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1859 #else
1860     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1861     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1862     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1863 #endif
1864   } else a->colmap = 0;
1865   if (oldmat->garray) {
1866     PetscInt len;
1867     len  = oldmat->B->n;
1868     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1869     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1870     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1871   } else a->garray = 0;
1872 
1873   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1874   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1875   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1876   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1877   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1878   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1879   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1880   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1881   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1882   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1883   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1884   *newmat = mat;
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #include "petscsys.h"
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatLoad_MPIAIJ"
1892 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1893 {
1894   Mat            A;
1895   PetscScalar    *vals,*svals;
1896   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1897   MPI_Status     status;
1898   PetscErrorCode ierr;
1899   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1900   PetscInt       i,nz,j,rstart,rend,mmax;
1901   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1902   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1903   PetscInt       cend,cstart,n,*rowners;
1904   int            fd;
1905 
1906   PetscFunctionBegin;
1907   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1908   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1909   if (!rank) {
1910     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1911     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1912     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1913   }
1914 
1915   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1916   M = header[1]; N = header[2];
1917   /* determine ownership of all rows */
1918   m    = M/size + ((M % size) > rank);
1919   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1920   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1921 
1922   /* First process needs enough room for process with most rows */
1923   if (!rank) {
1924     mmax       = rowners[1];
1925     for (i=2; i<size; i++) {
1926       mmax = PetscMax(mmax,rowners[i]);
1927     }
1928   } else mmax = m;
1929 
1930   rowners[0] = 0;
1931   for (i=2; i<=size; i++) {
1932     mmax       = PetscMax(mmax,rowners[i]);
1933     rowners[i] += rowners[i-1];
1934   }
1935   rstart = rowners[rank];
1936   rend   = rowners[rank+1];
1937 
1938   /* distribute row lengths to all processors */
1939   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1940   if (!rank) {
1941     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1942     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1943     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1944     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1945     for (j=0; j<m; j++) {
1946       procsnz[0] += ourlens[j];
1947     }
1948     for (i=1; i<size; i++) {
1949       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1950       /* calculate the number of nonzeros on each processor */
1951       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1952         procsnz[i] += rowlengths[j];
1953       }
1954       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1955     }
1956     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1957   } else {
1958     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1959   }
1960 
1961   if (!rank) {
1962     /* determine max buffer needed and allocate it */
1963     maxnz = 0;
1964     for (i=0; i<size; i++) {
1965       maxnz = PetscMax(maxnz,procsnz[i]);
1966     }
1967     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1968 
1969     /* read in my part of the matrix column indices  */
1970     nz   = procsnz[0];
1971     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1972     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1973 
1974     /* read in every one elses and ship off */
1975     for (i=1; i<size; i++) {
1976       nz   = procsnz[i];
1977       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1978       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1979     }
1980     ierr = PetscFree(cols);CHKERRQ(ierr);
1981   } else {
1982     /* determine buffer space needed for message */
1983     nz = 0;
1984     for (i=0; i<m; i++) {
1985       nz += ourlens[i];
1986     }
1987     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1988 
1989     /* receive message of column indices*/
1990     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1991     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1992     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1993   }
1994 
1995   /* determine column ownership if matrix is not square */
1996   if (N != M) {
1997     n      = N/size + ((N % size) > rank);
1998     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1999     cstart = cend - n;
2000   } else {
2001     cstart = rstart;
2002     cend   = rend;
2003     n      = cend - cstart;
2004   }
2005 
2006   /* loop over local rows, determining number of off diagonal entries */
2007   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2008   jj = 0;
2009   for (i=0; i<m; i++) {
2010     for (j=0; j<ourlens[i]; j++) {
2011       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2012       jj++;
2013     }
2014   }
2015 
2016   /* create our matrix */
2017   for (i=0; i<m; i++) {
2018     ourlens[i] -= offlens[i];
2019   }
2020   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2021   ierr = MatSetType(A,type);CHKERRQ(ierr);
2022   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2023 
2024   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2025   for (i=0; i<m; i++) {
2026     ourlens[i] += offlens[i];
2027   }
2028 
2029   if (!rank) {
2030     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2031 
2032     /* read in my part of the matrix numerical values  */
2033     nz   = procsnz[0];
2034     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2035 
2036     /* insert into matrix */
2037     jj      = rstart;
2038     smycols = mycols;
2039     svals   = vals;
2040     for (i=0; i<m; i++) {
2041       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2042       smycols += ourlens[i];
2043       svals   += ourlens[i];
2044       jj++;
2045     }
2046 
2047     /* read in other processors and ship out */
2048     for (i=1; i<size; i++) {
2049       nz   = procsnz[i];
2050       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2051       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2052     }
2053     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2054   } else {
2055     /* receive numeric values */
2056     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2057 
2058     /* receive message of values*/
2059     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2060     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2061     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2062 
2063     /* insert into matrix */
2064     jj      = rstart;
2065     smycols = mycols;
2066     svals   = vals;
2067     for (i=0; i<m; i++) {
2068       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2069       smycols += ourlens[i];
2070       svals   += ourlens[i];
2071       jj++;
2072     }
2073   }
2074   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2075   ierr = PetscFree(vals);CHKERRQ(ierr);
2076   ierr = PetscFree(mycols);CHKERRQ(ierr);
2077   ierr = PetscFree(rowners);CHKERRQ(ierr);
2078 
2079   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2080   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2081   *newmat = A;
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2087 /*
2088     Not great since it makes two copies of the submatrix, first an SeqAIJ
2089   in local and then by concatenating the local matrices the end result.
2090   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2091 */
2092 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2093 {
2094   PetscErrorCode ierr;
2095   PetscMPIInt    rank,size;
2096   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2097   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2098   Mat            *local,M,Mreuse;
2099   PetscScalar    *vwork,*aa;
2100   MPI_Comm       comm = mat->comm;
2101   Mat_SeqAIJ     *aij;
2102 
2103 
2104   PetscFunctionBegin;
2105   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2107 
2108   if (call ==  MAT_REUSE_MATRIX) {
2109     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2110     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2111     local = &Mreuse;
2112     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2113   } else {
2114     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2115     Mreuse = *local;
2116     ierr   = PetscFree(local);CHKERRQ(ierr);
2117   }
2118 
2119   /*
2120       m - number of local rows
2121       n - number of columns (same on all processors)
2122       rstart - first row in new global matrix generated
2123   */
2124   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2125   if (call == MAT_INITIAL_MATRIX) {
2126     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2127     ii  = aij->i;
2128     jj  = aij->j;
2129 
2130     /*
2131         Determine the number of non-zeros in the diagonal and off-diagonal
2132         portions of the matrix in order to do correct preallocation
2133     */
2134 
2135     /* first get start and end of "diagonal" columns */
2136     if (csize == PETSC_DECIDE) {
2137       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2138       if (mglobal == n) { /* square matrix */
2139 	nlocal = m;
2140       } else {
2141         nlocal = n/size + ((n % size) > rank);
2142       }
2143     } else {
2144       nlocal = csize;
2145     }
2146     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2147     rstart = rend - nlocal;
2148     if (rank == size - 1 && rend != n) {
2149       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2150     }
2151 
2152     /* next, compute all the lengths */
2153     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2154     olens = dlens + m;
2155     for (i=0; i<m; i++) {
2156       jend = ii[i+1] - ii[i];
2157       olen = 0;
2158       dlen = 0;
2159       for (j=0; j<jend; j++) {
2160         if (*jj < rstart || *jj >= rend) olen++;
2161         else dlen++;
2162         jj++;
2163       }
2164       olens[i] = olen;
2165       dlens[i] = dlen;
2166     }
2167     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2168     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2169     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2170     ierr = PetscFree(dlens);CHKERRQ(ierr);
2171   } else {
2172     PetscInt ml,nl;
2173 
2174     M = *newmat;
2175     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2176     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2177     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2178     /*
2179          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2180        rather than the slower MatSetValues().
2181     */
2182     M->was_assembled = PETSC_TRUE;
2183     M->assembled     = PETSC_FALSE;
2184   }
2185   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2186   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2187   ii  = aij->i;
2188   jj  = aij->j;
2189   aa  = aij->a;
2190   for (i=0; i<m; i++) {
2191     row   = rstart + i;
2192     nz    = ii[i+1] - ii[i];
2193     cwork = jj;     jj += nz;
2194     vwork = aa;     aa += nz;
2195     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2196   }
2197 
2198   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2200   *newmat = M;
2201 
2202   /* save submatrix used in processor for next request */
2203   if (call ==  MAT_INITIAL_MATRIX) {
2204     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2205     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2206   }
2207 
2208   PetscFunctionReturn(0);
2209 }
2210 
2211 EXTERN_C_BEGIN
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2214 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2215 {
2216   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2217   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2218   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2219   const PetscInt *JJ;
2220   PetscScalar    *values;
2221   PetscErrorCode ierr;
2222 
2223   PetscFunctionBegin;
2224 #if defined(PETSC_OPT_g)
2225   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2226 #endif
2227   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2228   o_nnz = d_nnz + m;
2229 
2230   for (i=0; i<m; i++) {
2231     nnz     = I[i+1]- I[i];
2232     JJ      = J + I[i];
2233     nnz_max = PetscMax(nnz_max,nnz);
2234 #if defined(PETSC_OPT_g)
2235     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2236 #endif
2237     for (j=0; j<nnz; j++) {
2238       if (*JJ >= cstart) break;
2239       JJ++;
2240     }
2241     d = 0;
2242     for (; j<nnz; j++) {
2243       if (*JJ++ >= cend) break;
2244       d++;
2245     }
2246     d_nnz[i] = d;
2247     o_nnz[i] = nnz - d;
2248   }
2249   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2250   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2251 
2252   if (v) values = (PetscScalar*)v;
2253   else {
2254     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2255     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2256   }
2257 
2258   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2259   for (i=0; i<m; i++) {
2260     ii   = i + rstart;
2261     nnz  = I[i+1]- I[i];
2262     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2263   }
2264   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2267 
2268   if (!v) {
2269     ierr = PetscFree(values);CHKERRQ(ierr);
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 EXTERN_C_END
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2277 /*@C
2278    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2279    (the default parallel PETSc format).
2280 
2281    Collective on MPI_Comm
2282 
2283    Input Parameters:
2284 +  A - the matrix
2285 .  i - the indices into j for the start of each local row (starts with zero)
2286 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2287 -  v - optional values in the matrix
2288 
2289    Level: developer
2290 
2291 .keywords: matrix, aij, compressed row, sparse, parallel
2292 
2293 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2294 @*/
2295 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2296 {
2297   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2298 
2299   PetscFunctionBegin;
2300   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2301   if (f) {
2302     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2303   }
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2309 /*@C
2310    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2311    (the default parallel PETSc format).  For good matrix assembly performance
2312    the user should preallocate the matrix storage by setting the parameters
2313    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2314    performance can be increased by more than a factor of 50.
2315 
2316    Collective on MPI_Comm
2317 
2318    Input Parameters:
2319 +  A - the matrix
2320 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2321            (same value is used for all local rows)
2322 .  d_nnz - array containing the number of nonzeros in the various rows of the
2323            DIAGONAL portion of the local submatrix (possibly different for each row)
2324            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2325            The size of this array is equal to the number of local rows, i.e 'm'.
2326            You must leave room for the diagonal entry even if it is zero.
2327 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2328            submatrix (same value is used for all local rows).
2329 -  o_nnz - array containing the number of nonzeros in the various rows of the
2330            OFF-DIAGONAL portion of the local submatrix (possibly different for
2331            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2332            structure. The size of this array is equal to the number
2333            of local rows, i.e 'm'.
2334 
2335    If the *_nnz parameter is given then the *_nz parameter is ignored
2336 
2337    The AIJ format (also called the Yale sparse matrix format or
2338    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2339    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2340 
2341    The parallel matrix is partitioned such that the first m0 rows belong to
2342    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2343    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2344 
2345    The DIAGONAL portion of the local submatrix of a processor can be defined
2346    as the submatrix which is obtained by extraction the part corresponding
2347    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2348    first row that belongs to the processor, and r2 is the last row belonging
2349    to the this processor. This is a square mxm matrix. The remaining portion
2350    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2351 
2352    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2353 
2354    Example usage:
2355 
2356    Consider the following 8x8 matrix with 34 non-zero values, that is
2357    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2358    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2359    as follows:
2360 
2361 .vb
2362             1  2  0  |  0  3  0  |  0  4
2363     Proc0   0  5  6  |  7  0  0  |  8  0
2364             9  0 10  | 11  0  0  | 12  0
2365     -------------------------------------
2366            13  0 14  | 15 16 17  |  0  0
2367     Proc1   0 18  0  | 19 20 21  |  0  0
2368             0  0  0  | 22 23  0  | 24  0
2369     -------------------------------------
2370     Proc2  25 26 27  |  0  0 28  | 29  0
2371            30  0  0  | 31 32 33  |  0 34
2372 .ve
2373 
2374    This can be represented as a collection of submatrices as:
2375 
2376 .vb
2377       A B C
2378       D E F
2379       G H I
2380 .ve
2381 
2382    Where the submatrices A,B,C are owned by proc0, D,E,F are
2383    owned by proc1, G,H,I are owned by proc2.
2384 
2385    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2386    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2387    The 'M','N' parameters are 8,8, and have the same values on all procs.
2388 
2389    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2390    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2391    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2392    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2393    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2394    matrix, ans [DF] as another SeqAIJ matrix.
2395 
2396    When d_nz, o_nz parameters are specified, d_nz storage elements are
2397    allocated for every row of the local diagonal submatrix, and o_nz
2398    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2399    One way to choose d_nz and o_nz is to use the max nonzerors per local
2400    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2401    In this case, the values of d_nz,o_nz are:
2402 .vb
2403      proc0 : dnz = 2, o_nz = 2
2404      proc1 : dnz = 3, o_nz = 2
2405      proc2 : dnz = 1, o_nz = 4
2406 .ve
2407    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2408    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2409    for proc3. i.e we are using 12+15+10=37 storage locations to store
2410    34 values.
2411 
2412    When d_nnz, o_nnz parameters are specified, the storage is specified
2413    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2414    In the above case the values for d_nnz,o_nnz are:
2415 .vb
2416      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2417      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2418      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2419 .ve
2420    Here the space allocated is sum of all the above values i.e 34, and
2421    hence pre-allocation is perfect.
2422 
2423    Level: intermediate
2424 
2425 .keywords: matrix, aij, compressed row, sparse, parallel
2426 
2427 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2428           MPIAIJ
2429 @*/
2430 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2431 {
2432   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2433 
2434   PetscFunctionBegin;
2435   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2436   if (f) {
2437     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2438   }
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatCreateMPIAIJ"
2444 /*@C
2445    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2446    (the default parallel PETSc format).  For good matrix assembly performance
2447    the user should preallocate the matrix storage by setting the parameters
2448    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2449    performance can be increased by more than a factor of 50.
2450 
2451    Collective on MPI_Comm
2452 
2453    Input Parameters:
2454 +  comm - MPI communicator
2455 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2456            This value should be the same as the local size used in creating the
2457            y vector for the matrix-vector product y = Ax.
2458 .  n - This value should be the same as the local size used in creating the
2459        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2460        calculated if N is given) For square matrices n is almost always m.
2461 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2462 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2463 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2464            (same value is used for all local rows)
2465 .  d_nnz - array containing the number of nonzeros in the various rows of the
2466            DIAGONAL portion of the local submatrix (possibly different for each row)
2467            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2468            The size of this array is equal to the number of local rows, i.e 'm'.
2469            You must leave room for the diagonal entry even if it is zero.
2470 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2471            submatrix (same value is used for all local rows).
2472 -  o_nnz - array containing the number of nonzeros in the various rows of the
2473            OFF-DIAGONAL portion of the local submatrix (possibly different for
2474            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2475            structure. The size of this array is equal to the number
2476            of local rows, i.e 'm'.
2477 
2478    Output Parameter:
2479 .  A - the matrix
2480 
2481    Notes:
2482    If the *_nnz parameter is given then the *_nz parameter is ignored
2483 
2484    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2485    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2486    storage requirements for this matrix.
2487 
2488    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2489    processor than it must be used on all processors that share the object for
2490    that argument.
2491 
2492    The user MUST specify either the local or global matrix dimensions
2493    (possibly both).
2494 
2495    The parallel matrix is partitioned such that the first m0 rows belong to
2496    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2497    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2498 
2499    The DIAGONAL portion of the local submatrix of a processor can be defined
2500    as the submatrix which is obtained by extraction the part corresponding
2501    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2502    first row that belongs to the processor, and r2 is the last row belonging
2503    to the this processor. This is a square mxm matrix. The remaining portion
2504    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2505 
2506    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2507 
2508    When calling this routine with a single process communicator, a matrix of
2509    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2510    type of communicator, use the construction mechanism:
2511      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2512 
2513    By default, this format uses inodes (identical nodes) when possible.
2514    We search for consecutive rows with the same nonzero structure, thereby
2515    reusing matrix information to achieve increased efficiency.
2516 
2517    Options Database Keys:
2518 +  -mat_no_inode  - Do not use inodes
2519 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2520 -  -mat_aij_oneindex - Internally use indexing starting at 1
2521         rather than 0.  Note that when calling MatSetValues(),
2522         the user still MUST index entries starting at 0!
2523 
2524 
2525    Example usage:
2526 
2527    Consider the following 8x8 matrix with 34 non-zero values, that is
2528    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2529    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2530    as follows:
2531 
2532 .vb
2533             1  2  0  |  0  3  0  |  0  4
2534     Proc0   0  5  6  |  7  0  0  |  8  0
2535             9  0 10  | 11  0  0  | 12  0
2536     -------------------------------------
2537            13  0 14  | 15 16 17  |  0  0
2538     Proc1   0 18  0  | 19 20 21  |  0  0
2539             0  0  0  | 22 23  0  | 24  0
2540     -------------------------------------
2541     Proc2  25 26 27  |  0  0 28  | 29  0
2542            30  0  0  | 31 32 33  |  0 34
2543 .ve
2544 
2545    This can be represented as a collection of submatrices as:
2546 
2547 .vb
2548       A B C
2549       D E F
2550       G H I
2551 .ve
2552 
2553    Where the submatrices A,B,C are owned by proc0, D,E,F are
2554    owned by proc1, G,H,I are owned by proc2.
2555 
2556    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2557    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2558    The 'M','N' parameters are 8,8, and have the same values on all procs.
2559 
2560    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2561    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2562    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2563    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2564    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2565    matrix, ans [DF] as another SeqAIJ matrix.
2566 
2567    When d_nz, o_nz parameters are specified, d_nz storage elements are
2568    allocated for every row of the local diagonal submatrix, and o_nz
2569    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2570    One way to choose d_nz and o_nz is to use the max nonzerors per local
2571    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2572    In this case, the values of d_nz,o_nz are:
2573 .vb
2574      proc0 : dnz = 2, o_nz = 2
2575      proc1 : dnz = 3, o_nz = 2
2576      proc2 : dnz = 1, o_nz = 4
2577 .ve
2578    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2579    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2580    for proc3. i.e we are using 12+15+10=37 storage locations to store
2581    34 values.
2582 
2583    When d_nnz, o_nnz parameters are specified, the storage is specified
2584    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2585    In the above case the values for d_nnz,o_nnz are:
2586 .vb
2587      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2588      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2589      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2590 .ve
2591    Here the space allocated is sum of all the above values i.e 34, and
2592    hence pre-allocation is perfect.
2593 
2594    Level: intermediate
2595 
2596 .keywords: matrix, aij, compressed row, sparse, parallel
2597 
2598 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2599           MPIAIJ
2600 @*/
2601 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2602 {
2603   PetscErrorCode ierr;
2604   PetscMPIInt    size;
2605 
2606   PetscFunctionBegin;
2607   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2608   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2609   if (size > 1) {
2610     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2611     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2612   } else {
2613     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2614     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2615   }
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2621 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2622 {
2623   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2624 
2625   PetscFunctionBegin;
2626   *Ad     = a->A;
2627   *Ao     = a->B;
2628   *colmap = a->garray;
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 #undef __FUNCT__
2633 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2634 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2635 {
2636   PetscErrorCode ierr;
2637   PetscInt       i;
2638   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2639 
2640   PetscFunctionBegin;
2641   if (coloring->ctype == IS_COLORING_LOCAL) {
2642     ISColoringValue *allcolors,*colors;
2643     ISColoring      ocoloring;
2644 
2645     /* set coloring for diagonal portion */
2646     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2647 
2648     /* set coloring for off-diagonal portion */
2649     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2650     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2651     for (i=0; i<a->B->n; i++) {
2652       colors[i] = allcolors[a->garray[i]];
2653     }
2654     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2655     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2656     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2657     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2658   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2659     ISColoringValue *colors;
2660     PetscInt             *larray;
2661     ISColoring      ocoloring;
2662 
2663     /* set coloring for diagonal portion */
2664     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2665     for (i=0; i<a->A->n; i++) {
2666       larray[i] = i + a->cstart;
2667     }
2668     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2669     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2670     for (i=0; i<a->A->n; i++) {
2671       colors[i] = coloring->colors[larray[i]];
2672     }
2673     ierr = PetscFree(larray);CHKERRQ(ierr);
2674     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2675     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2676     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2677 
2678     /* set coloring for off-diagonal portion */
2679     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2680     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2681     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2682     for (i=0; i<a->B->n; i++) {
2683       colors[i] = coloring->colors[larray[i]];
2684     }
2685     ierr = PetscFree(larray);CHKERRQ(ierr);
2686     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2687     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2688     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2689   } else {
2690     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2691   }
2692 
2693   PetscFunctionReturn(0);
2694 }
2695 
2696 #if defined(PETSC_HAVE_ADIC)
2697 #undef __FUNCT__
2698 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2699 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2700 {
2701   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2702   PetscErrorCode ierr;
2703 
2704   PetscFunctionBegin;
2705   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2706   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 #endif
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2713 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2714 {
2715   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2716   PetscErrorCode ierr;
2717 
2718   PetscFunctionBegin;
2719   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2720   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 #undef __FUNCT__
2725 #define __FUNCT__ "MatMerge"
2726 /*@C
2727       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2728                  matrices from each processor
2729 
2730     Collective on MPI_Comm
2731 
2732    Input Parameters:
2733 +    comm - the communicators the parallel matrix will live on
2734 .    inmat - the input sequential matrices
2735 .    n - number of local columns (or PETSC_DECIDE)
2736 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2737 
2738    Output Parameter:
2739 .    outmat - the parallel matrix generated
2740 
2741     Level: advanced
2742 
2743    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2744 
2745 @*/
2746 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2747 {
2748   PetscErrorCode ierr;
2749   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2750   PetscInt       *indx;
2751   PetscScalar    *values;
2752   PetscMap       columnmap,rowmap;
2753 
2754   PetscFunctionBegin;
2755     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2756   /*
2757   PetscMPIInt       rank;
2758   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2759   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2760   */
2761   if (scall == MAT_INITIAL_MATRIX){
2762     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2763     if (n == PETSC_DECIDE){
2764       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2765       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2766       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2767       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2768       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2769     }
2770 
2771     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2772     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2773     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2774     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2775     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2776 
2777     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2778     for (i=0;i<m;i++) {
2779       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2780       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2781       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2782     }
2783     /* This routine will ONLY return MPIAIJ type matrix */
2784     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2785     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2786     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2787     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2788 
2789   } else if (scall == MAT_REUSE_MATRIX){
2790     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2791   } else {
2792     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2793   }
2794 
2795   for (i=0;i<m;i++) {
2796     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2797     I    = i + rstart;
2798     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2799     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2800   }
2801   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2802   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2803   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2804 
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatFileSplit"
2810 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2811 {
2812   PetscErrorCode    ierr;
2813   PetscMPIInt       rank;
2814   PetscInt          m,N,i,rstart,nnz;
2815   size_t            len;
2816   const PetscInt    *indx;
2817   PetscViewer       out;
2818   char              *name;
2819   Mat               B;
2820   const PetscScalar *values;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2824   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2825   /* Should this be the type of the diagonal block of A? */
2826   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2827   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2828   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2829   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2830   for (i=0;i<m;i++) {
2831     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2832     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2833     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2834   }
2835   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2836   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837 
2838   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2839   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2840   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2841   sprintf(name,"%s.%d",outfile,rank);
2842   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2843   ierr = PetscFree(name);
2844   ierr = MatView(B,out);CHKERRQ(ierr);
2845   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2846   ierr = MatDestroy(B);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2854 {
2855   PetscErrorCode       ierr;
2856   Mat_Merge_SeqsToMPI  *merge;
2857   PetscObjectContainer container;
2858 
2859   PetscFunctionBegin;
2860   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2861   if (container) {
2862     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2863     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2864     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2865     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2866     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2867     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2868     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2869     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2870     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2871     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2872     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2873     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2874 
2875     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2876     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2877   }
2878   ierr = PetscFree(merge);CHKERRQ(ierr);
2879 
2880   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 #include "src/mat/utils/freespace.h"
2885 #include "petscbt.h"
2886 static PetscEvent logkey_seqstompinum = 0;
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2889 /*@C
2890       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2891                  matrices from each processor
2892 
2893     Collective on MPI_Comm
2894 
2895    Input Parameters:
2896 +    comm - the communicators the parallel matrix will live on
2897 .    seqmat - the input sequential matrices
2898 .    m - number of local rows (or PETSC_DECIDE)
2899 .    n - number of local columns (or PETSC_DECIDE)
2900 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2901 
2902    Output Parameter:
2903 .    mpimat - the parallel matrix generated
2904 
2905     Level: advanced
2906 
2907    Notes:
2908      The dimensions of the sequential matrix in each processor MUST be the same.
2909      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2910      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2911 @*/
2912 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2913 {
2914   PetscErrorCode       ierr;
2915   MPI_Comm             comm=mpimat->comm;
2916   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2917   PetscMPIInt          size,rank,taga,*len_s;
2918   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2919   PetscInt             proc,m;
2920   PetscInt             **buf_ri,**buf_rj;
2921   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2922   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2923   MPI_Request          *s_waits,*r_waits;
2924   MPI_Status           *status;
2925   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2926   Mat_Merge_SeqsToMPI  *merge;
2927   PetscObjectContainer container;
2928 
2929   PetscFunctionBegin;
2930   if (!logkey_seqstompinum) {
2931     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2932   }
2933   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2934 
2935   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2936   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2937 
2938   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2939   if (container) {
2940     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2941   }
2942   bi     = merge->bi;
2943   bj     = merge->bj;
2944   buf_ri = merge->buf_ri;
2945   buf_rj = merge->buf_rj;
2946 
2947   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2948   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2949   len_s  = merge->len_s;
2950 
2951   /* send and recv matrix values */
2952   /*-----------------------------*/
2953   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2954   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2955 
2956   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2957   for (proc=0,k=0; proc<size; proc++){
2958     if (!len_s[proc]) continue;
2959     i = owners[proc];
2960     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2961     k++;
2962   }
2963 
2964   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2965   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2966   ierr = PetscFree(status);CHKERRQ(ierr);
2967 
2968   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2969   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2970 
2971   /* insert mat values of mpimat */
2972   /*----------------------------*/
2973   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2974   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2975   nextrow = buf_ri_k + merge->nrecv;
2976   nextai  = nextrow + merge->nrecv;
2977 
2978   for (k=0; k<merge->nrecv; k++){
2979     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2980     nrows = *(buf_ri_k[k]);
2981     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2982     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2983   }
2984 
2985   /* set values of ba */
2986   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2987   for (i=0; i<m; i++) {
2988     arow = owners[rank] + i;
2989     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2990     bnzi = bi[i+1] - bi[i];
2991     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2992 
2993     /* add local non-zero vals of this proc's seqmat into ba */
2994     anzi = ai[arow+1] - ai[arow];
2995     aj   = a->j + ai[arow];
2996     aa   = a->a + ai[arow];
2997     nextaj = 0;
2998     for (j=0; nextaj<anzi; j++){
2999       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3000         ba_i[j] += aa[nextaj++];
3001       }
3002     }
3003 
3004     /* add received vals into ba */
3005     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3006       /* i-th row */
3007       if (i == *nextrow[k]) {
3008         anzi = *(nextai[k]+1) - *nextai[k];
3009         aj   = buf_rj[k] + *(nextai[k]);
3010         aa   = abuf_r[k] + *(nextai[k]);
3011         nextaj = 0;
3012         for (j=0; nextaj<anzi; j++){
3013           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3014             ba_i[j] += aa[nextaj++];
3015           }
3016         }
3017         nextrow[k]++; nextai[k]++;
3018       }
3019     }
3020     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3021   }
3022   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3023   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3024 
3025   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3026   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3027   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3028   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 
3032 static PetscEvent logkey_seqstompisym = 0;
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3035 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3036 {
3037   PetscErrorCode       ierr;
3038   Mat                  B_mpi;
3039   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3040   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3041   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3042   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3043   PetscInt             len,proc,*dnz,*onz;
3044   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3045   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3046   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3047   MPI_Status           *status;
3048   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3049   PetscBT              lnkbt;
3050   Mat_Merge_SeqsToMPI  *merge;
3051   PetscObjectContainer container;
3052 
3053   PetscFunctionBegin;
3054   if (!logkey_seqstompisym) {
3055     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3056   }
3057   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3058 
3059   /* make sure it is a PETSc comm */
3060   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3062   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3063 
3064   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3065   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3066 
3067   /* determine row ownership */
3068   /*---------------------------------------------------------*/
3069   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3070   if (m == PETSC_DECIDE) {
3071     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3072   } else {
3073     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3074   }
3075   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3076   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3077   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3078 
3079   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3080   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3081 
3082   /* determine the number of messages to send, their lengths */
3083   /*---------------------------------------------------------*/
3084   len_s  = merge->len_s;
3085 
3086   len = 0;  /* length of buf_si[] */
3087   merge->nsend = 0;
3088   for (proc=0; proc<size; proc++){
3089     len_si[proc] = 0;
3090     if (proc == rank){
3091       len_s[proc] = 0;
3092     } else {
3093       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3094       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3095     }
3096     if (len_s[proc]) {
3097       merge->nsend++;
3098       nrows = 0;
3099       for (i=owners[proc]; i<owners[proc+1]; i++){
3100         if (ai[i+1] > ai[i]) nrows++;
3101       }
3102       len_si[proc] = 2*(nrows+1);
3103       len += len_si[proc];
3104     }
3105   }
3106 
3107   /* determine the number and length of messages to receive for ij-structure */
3108   /*-------------------------------------------------------------------------*/
3109   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3110   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3111 
3112   /* post the Irecv of j-structure */
3113   /*-------------------------------*/
3114   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3115   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3116 
3117   /* post the Isend of j-structure */
3118   /*--------------------------------*/
3119   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3120   sj_waits = si_waits + merge->nsend;
3121 
3122   for (proc=0, k=0; proc<size; proc++){
3123     if (!len_s[proc]) continue;
3124     i = owners[proc];
3125     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3126     k++;
3127   }
3128 
3129   /* receives and sends of j-structure are complete */
3130   /*------------------------------------------------*/
3131   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3132   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3133 
3134   /* send and recv i-structure */
3135   /*---------------------------*/
3136   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3137   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3138 
3139   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3140   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3141   for (proc=0,k=0; proc<size; proc++){
3142     if (!len_s[proc]) continue;
3143     /* form outgoing message for i-structure:
3144          buf_si[0]:                 nrows to be sent
3145                [1:nrows]:           row index (global)
3146                [nrows+1:2*nrows+1]: i-structure index
3147     */
3148     /*-------------------------------------------*/
3149     nrows = len_si[proc]/2 - 1;
3150     buf_si_i    = buf_si + nrows+1;
3151     buf_si[0]   = nrows;
3152     buf_si_i[0] = 0;
3153     nrows = 0;
3154     for (i=owners[proc]; i<owners[proc+1]; i++){
3155       anzi = ai[i+1] - ai[i];
3156       if (anzi) {
3157         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3158         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3159         nrows++;
3160       }
3161     }
3162     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3163     k++;
3164     buf_si += len_si[proc];
3165   }
3166 
3167   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3168   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3169 
3170   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3171   for (i=0; i<merge->nrecv; i++){
3172     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3173   }
3174 
3175   ierr = PetscFree(len_si);CHKERRQ(ierr);
3176   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3177   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3178   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3179   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3180   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3181   ierr = PetscFree(status);CHKERRQ(ierr);
3182 
3183   /* compute a local seq matrix in each processor */
3184   /*----------------------------------------------*/
3185   /* allocate bi array and free space for accumulating nonzero column info */
3186   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3187   bi[0] = 0;
3188 
3189   /* create and initialize a linked list */
3190   nlnk = N+1;
3191   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3192 
3193   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3194   len = 0;
3195   len  = ai[owners[rank+1]] - ai[owners[rank]];
3196   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3197   current_space = free_space;
3198 
3199   /* determine symbolic info for each local row */
3200   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3201   nextrow = buf_ri_k + merge->nrecv;
3202   nextai  = nextrow + merge->nrecv;
3203   for (k=0; k<merge->nrecv; k++){
3204     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3205     nrows = *buf_ri_k[k];
3206     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3207     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3208   }
3209 
3210   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3211   len = 0;
3212   for (i=0;i<m;i++) {
3213     bnzi   = 0;
3214     /* add local non-zero cols of this proc's seqmat into lnk */
3215     arow   = owners[rank] + i;
3216     anzi   = ai[arow+1] - ai[arow];
3217     aj     = a->j + ai[arow];
3218     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3219     bnzi += nlnk;
3220     /* add received col data into lnk */
3221     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3222       if (i == *nextrow[k]) { /* i-th row */
3223         anzi = *(nextai[k]+1) - *nextai[k];
3224         aj   = buf_rj[k] + *nextai[k];
3225         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3226         bnzi += nlnk;
3227         nextrow[k]++; nextai[k]++;
3228       }
3229     }
3230     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3231 
3232     /* if free space is not available, make more free space */
3233     if (current_space->local_remaining<bnzi) {
3234       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3235       nspacedouble++;
3236     }
3237     /* copy data into free space, then initialize lnk */
3238     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3239     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3240 
3241     current_space->array           += bnzi;
3242     current_space->local_used      += bnzi;
3243     current_space->local_remaining -= bnzi;
3244 
3245     bi[i+1] = bi[i] + bnzi;
3246   }
3247 
3248   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3249 
3250   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3251   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3252   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3253 
3254   /* create symbolic parallel matrix B_mpi */
3255   /*---------------------------------------*/
3256   if (n==PETSC_DECIDE) {
3257     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3258   } else {
3259     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3260   }
3261   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3262   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3263   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3264 
3265   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3266   B_mpi->assembled     = PETSC_FALSE;
3267   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3268   merge->bi            = bi;
3269   merge->bj            = bj;
3270   merge->buf_ri        = buf_ri;
3271   merge->buf_rj        = buf_rj;
3272   merge->coi           = PETSC_NULL;
3273   merge->coj           = PETSC_NULL;
3274   merge->owners_co     = PETSC_NULL;
3275 
3276   /* attach the supporting struct to B_mpi for reuse */
3277   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3278   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3279   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3280   *mpimat = B_mpi;
3281 
3282   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3283   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3284   PetscFunctionReturn(0);
3285 }
3286 
3287 static PetscEvent logkey_seqstompi = 0;
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatMerge_SeqsToMPI"
3290 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3291 {
3292   PetscErrorCode   ierr;
3293 
3294   PetscFunctionBegin;
3295   if (!logkey_seqstompi) {
3296     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3297   }
3298   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3299   if (scall == MAT_INITIAL_MATRIX){
3300     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3301   }
3302   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3303   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3304   PetscFunctionReturn(0);
3305 }
3306 static PetscEvent logkey_getlocalmat = 0;
3307 #undef __FUNCT__
3308 #define __FUNCT__ "MatGetLocalMat"
3309 /*@C
3310      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3311 
3312     Not Collective
3313 
3314    Input Parameters:
3315 +    A - the matrix
3316 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3317 
3318    Output Parameter:
3319 .    A_loc - the local sequential matrix generated
3320 
3321     Level: developer
3322 
3323 @*/
3324 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3325 {
3326   PetscErrorCode  ierr;
3327   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3328   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3329   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3330   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3331   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3332   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3333 
3334   PetscFunctionBegin;
3335   if (!logkey_getlocalmat) {
3336     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3337   }
3338   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3339   if (scall == MAT_INITIAL_MATRIX){
3340     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3341     ci[0] = 0;
3342     for (i=0; i<am; i++){
3343       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3344     }
3345     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3346     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3347     k = 0;
3348     for (i=0; i<am; i++) {
3349       ncols_o = bi[i+1] - bi[i];
3350       ncols_d = ai[i+1] - ai[i];
3351       /* off-diagonal portion of A */
3352       for (jo=0; jo<ncols_o; jo++) {
3353         col = cmap[*bj];
3354         if (col >= cstart) break;
3355         cj[k]   = col; bj++;
3356         ca[k++] = *ba++;
3357       }
3358       /* diagonal portion of A */
3359       for (j=0; j<ncols_d; j++) {
3360         cj[k]   = cstart + *aj++;
3361         ca[k++] = *aa++;
3362       }
3363       /* off-diagonal portion of A */
3364       for (j=jo; j<ncols_o; j++) {
3365         cj[k]   = cmap[*bj++];
3366         ca[k++] = *ba++;
3367       }
3368     }
3369     /* put together the new matrix */
3370     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3371     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3372     /* Since these are PETSc arrays, change flags to free them as necessary. */
3373     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3374     mat->freedata = PETSC_TRUE;
3375     mat->nonew    = 0;
3376   } else if (scall == MAT_REUSE_MATRIX){
3377     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3378     ci = mat->i; cj = mat->j; ca = mat->a;
3379     for (i=0; i<am; i++) {
3380       /* off-diagonal portion of A */
3381       ncols_o = bi[i+1] - bi[i];
3382       for (jo=0; jo<ncols_o; jo++) {
3383         col = cmap[*bj];
3384         if (col >= cstart) break;
3385         *ca++ = *ba++; bj++;
3386       }
3387       /* diagonal portion of A */
3388       ncols_d = ai[i+1] - ai[i];
3389       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3390       /* off-diagonal portion of A */
3391       for (j=jo; j<ncols_o; j++) {
3392         *ca++ = *ba++; bj++;
3393       }
3394     }
3395   } else {
3396     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3397   }
3398 
3399   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 static PetscEvent logkey_getlocalmatcondensed = 0;
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatGetLocalMatCondensed"
3406 /*@C
3407      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3408 
3409     Not Collective
3410 
3411    Input Parameters:
3412 +    A - the matrix
3413 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3414 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3415 
3416    Output Parameter:
3417 .    A_loc - the local sequential matrix generated
3418 
3419     Level: developer
3420 
3421 @*/
3422 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3423 {
3424   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3425   PetscErrorCode    ierr;
3426   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3427   IS                isrowa,iscola;
3428   Mat               *aloc;
3429 
3430   PetscFunctionBegin;
3431   if (!logkey_getlocalmatcondensed) {
3432     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3433   }
3434   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3435   if (!row){
3436     start = a->rstart; end = a->rend;
3437     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3438   } else {
3439     isrowa = *row;
3440   }
3441   if (!col){
3442     start = a->cstart;
3443     cmap  = a->garray;
3444     nzA   = a->A->n;
3445     nzB   = a->B->n;
3446     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3447     ncols = 0;
3448     for (i=0; i<nzB; i++) {
3449       if (cmap[i] < start) idx[ncols++] = cmap[i];
3450       else break;
3451     }
3452     imark = i;
3453     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3454     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3455     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3456     ierr = PetscFree(idx);CHKERRQ(ierr);
3457   } else {
3458     iscola = *col;
3459   }
3460   if (scall != MAT_INITIAL_MATRIX){
3461     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3462     aloc[0] = *A_loc;
3463   }
3464   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3465   *A_loc = aloc[0];
3466   ierr = PetscFree(aloc);CHKERRQ(ierr);
3467   if (!row){
3468     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3469   }
3470   if (!col){
3471     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3472   }
3473   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3474   PetscFunctionReturn(0);
3475 }
3476 
3477 static PetscEvent logkey_GetBrowsOfAcols = 0;
3478 #undef __FUNCT__
3479 #define __FUNCT__ "MatGetBrowsOfAcols"
3480 /*@C
3481     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3482 
3483     Collective on Mat
3484 
3485    Input Parameters:
3486 +    A,B - the matrices in mpiaij format
3487 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3488 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3489 
3490    Output Parameter:
3491 +    rowb, colb - index sets of rows and columns of B to extract
3492 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3493 -    B_seq - the sequential matrix generated
3494 
3495     Level: developer
3496 
3497 @*/
3498 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3499 {
3500   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3501   PetscErrorCode    ierr;
3502   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3503   IS                isrowb,iscolb;
3504   Mat               *bseq;
3505 
3506   PetscFunctionBegin;
3507   if (a->cstart != b->rstart || a->cend != b->rend){
3508     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3509   }
3510   if (!logkey_GetBrowsOfAcols) {
3511     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3512   }
3513   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3514 
3515   if (scall == MAT_INITIAL_MATRIX){
3516     start = a->cstart;
3517     cmap  = a->garray;
3518     nzA   = a->A->n;
3519     nzB   = a->B->n;
3520     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3521     ncols = 0;
3522     for (i=0; i<nzB; i++) {  /* row < local row index */
3523       if (cmap[i] < start) idx[ncols++] = cmap[i];
3524       else break;
3525     }
3526     imark = i;
3527     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3528     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3529     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3530     ierr = PetscFree(idx);CHKERRQ(ierr);
3531     *brstart = imark;
3532     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3533   } else {
3534     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3535     isrowb = *rowb; iscolb = *colb;
3536     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3537     bseq[0] = *B_seq;
3538   }
3539   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3540   *B_seq = bseq[0];
3541   ierr = PetscFree(bseq);CHKERRQ(ierr);
3542   if (!rowb){
3543     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3544   } else {
3545     *rowb = isrowb;
3546   }
3547   if (!colb){
3548     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3549   } else {
3550     *colb = iscolb;
3551   }
3552   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3553   PetscFunctionReturn(0);
3554 }
3555 
3556 static PetscEvent logkey_GetBrowsOfAocols = 0;
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatGetBrowsOfAoCols"
3559 /*@C
3560     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3561     of the OFF-DIAGONAL portion of local A
3562 
3563     Collective on Mat
3564 
3565    Input Parameters:
3566 +    A,B - the matrices in mpiaij format
3567 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3568 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3569 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3570 
3571    Output Parameter:
3572 +    B_oth - the sequential matrix generated
3573 
3574     Level: developer
3575 
3576 @*/
3577 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3578 {
3579   VecScatter_MPI_General *gen_to,*gen_from;
3580   PetscErrorCode         ierr;
3581   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3582   Mat_SeqAIJ             *b_oth;
3583   VecScatter             ctx=a->Mvctx;
3584   MPI_Comm               comm=ctx->comm;
3585   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3586   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3587   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3588   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3589   MPI_Request            *rwaits,*swaits;
3590   MPI_Status             *sstatus,rstatus;
3591   PetscInt               *cols;
3592   PetscScalar            *vals;
3593   PetscMPIInt            j;
3594 
3595   PetscFunctionBegin;
3596   if (a->cstart != b->rstart || a->cend != b->rend){
3597     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3598   }
3599   if (!logkey_GetBrowsOfAocols) {
3600     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3601   }
3602   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3603   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3604 
3605   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3606   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3607   rvalues  = gen_from->values; /* holds the length of sending row */
3608   svalues  = gen_to->values;   /* holds the length of receiving row */
3609   nrecvs   = gen_from->n;
3610   nsends   = gen_to->n;
3611   rwaits   = gen_from->requests;
3612   swaits   = gen_to->requests;
3613   srow     = gen_to->indices;   /* local row index to be sent */
3614   rstarts  = gen_from->starts;
3615   sstarts  = gen_to->starts;
3616   rprocs   = gen_from->procs;
3617   sprocs   = gen_to->procs;
3618   sstatus  = gen_to->sstatus;
3619 
3620   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3621   if (scall == MAT_INITIAL_MATRIX){
3622     /* i-array */
3623     /*---------*/
3624     /*  post receives */
3625     for (i=0; i<nrecvs; i++){
3626       rowlen = (PetscInt*)rvalues + rstarts[i];
3627       nrows = rstarts[i+1]-rstarts[i];
3628       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3629     }
3630 
3631     /* pack the outgoing message */
3632     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3633     rstartsj = sstartsj + nsends +1;
3634     sstartsj[0] = 0;  rstartsj[0] = 0;
3635     len = 0; /* total length of j or a array to be sent */
3636     k = 0;
3637     for (i=0; i<nsends; i++){
3638       rowlen = (PetscInt*)svalues + sstarts[i];
3639       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3640       for (j=0; j<nrows; j++) {
3641         row = srow[k] + b->rowners[rank]; /* global row idx */
3642         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3643         len += rowlen[j];
3644         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3645         k++;
3646       }
3647       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3648        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3649     }
3650     /* recvs and sends of i-array are completed */
3651     i = nrecvs;
3652     while (i--) {
3653       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3654     }
3655     if (nsends) {
3656       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3657     }
3658     /* allocate buffers for sending j and a arrays */
3659     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3660     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3661 
3662     /* create i-array of B_oth */
3663     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3664     b_othi[0] = 0;
3665     len = 0; /* total length of j or a array to be received */
3666     k = 0;
3667     for (i=0; i<nrecvs; i++){
3668       rowlen = (PetscInt*)rvalues + rstarts[i];
3669       nrows = rstarts[i+1]-rstarts[i];
3670       for (j=0; j<nrows; j++) {
3671         b_othi[k+1] = b_othi[k] + rowlen[j];
3672         len += rowlen[j]; k++;
3673       }
3674       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3675     }
3676 
3677     /* allocate space for j and a arrrays of B_oth */
3678     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3679     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3680 
3681     /* j-array */
3682     /*---------*/
3683     /*  post receives of j-array */
3684     for (i=0; i<nrecvs; i++){
3685       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3686       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3687     }
3688     k = 0;
3689     for (i=0; i<nsends; i++){
3690       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3691       bufJ = bufj+sstartsj[i];
3692       for (j=0; j<nrows; j++) {
3693         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3694         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3695         for (l=0; l<ncols; l++){
3696           *bufJ++ = cols[l];
3697         }
3698         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3699       }
3700       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3701     }
3702 
3703     /* recvs and sends of j-array are completed */
3704     i = nrecvs;
3705     while (i--) {
3706       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3707     }
3708     if (nsends) {
3709       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3710     }
3711   } else if (scall == MAT_REUSE_MATRIX){
3712     sstartsj = *startsj;
3713     rstartsj = sstartsj + nsends +1;
3714     bufa     = *bufa_ptr;
3715     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3716     b_otha   = b_oth->a;
3717   } else {
3718     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3719   }
3720 
3721   /* a-array */
3722   /*---------*/
3723   /*  post receives of a-array */
3724   for (i=0; i<nrecvs; i++){
3725     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3726     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3727   }
3728   k = 0;
3729   for (i=0; i<nsends; i++){
3730     nrows = sstarts[i+1]-sstarts[i];
3731     bufA = bufa+sstartsj[i];
3732     for (j=0; j<nrows; j++) {
3733       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3734       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3735       for (l=0; l<ncols; l++){
3736         *bufA++ = vals[l];
3737       }
3738       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3739 
3740     }
3741     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3742   }
3743   /* recvs and sends of a-array are completed */
3744   i = nrecvs;
3745   while (i--) {
3746     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3747   }
3748    if (nsends) {
3749     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3750   }
3751 
3752   if (scall == MAT_INITIAL_MATRIX){
3753     /* put together the new matrix */
3754     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3755 
3756     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3757     /* Since these are PETSc arrays, change flags to free them as necessary. */
3758     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3759     b_oth->freedata = PETSC_TRUE;
3760     b_oth->nonew    = 0;
3761 
3762     ierr = PetscFree(bufj);CHKERRQ(ierr);
3763     if (!startsj || !bufa_ptr){
3764       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3765       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3766     } else {
3767       *startsj  = sstartsj;
3768       *bufa_ptr = bufa;
3769     }
3770   }
3771   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3772 
3773   PetscFunctionReturn(0);
3774 }
3775 
3776 /*MC
3777    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3778 
3779    Options Database Keys:
3780 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3781 
3782   Level: beginner
3783 
3784 .seealso: MatCreateMPIAIJ
3785 M*/
3786 
3787 EXTERN_C_BEGIN
3788 #undef __FUNCT__
3789 #define __FUNCT__ "MatCreate_MPIAIJ"
3790 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3791 {
3792   Mat_MPIAIJ     *b;
3793   PetscErrorCode ierr;
3794   PetscInt       i;
3795   PetscMPIInt    size;
3796 
3797   PetscFunctionBegin;
3798   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3799 
3800   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3801   B->data         = (void*)b;
3802   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3803   B->factor       = 0;
3804   B->bs           = 1;
3805   B->assembled    = PETSC_FALSE;
3806   B->mapping      = 0;
3807 
3808   B->insertmode      = NOT_SET_VALUES;
3809   b->size            = size;
3810   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3811 
3812   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3813   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3814 
3815   /* the information in the maps duplicates the information computed below, eventually
3816      we should remove the duplicate information that is not contained in the maps */
3817   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3818   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3819 
3820   /* build local table of row and column ownerships */
3821   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3822   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3823   b->cowners = b->rowners + b->size + 2;
3824   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3825   b->rowners[0] = 0;
3826   for (i=2; i<=b->size; i++) {
3827     b->rowners[i] += b->rowners[i-1];
3828   }
3829   b->rstart = b->rowners[b->rank];
3830   b->rend   = b->rowners[b->rank+1];
3831   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3832   b->cowners[0] = 0;
3833   for (i=2; i<=b->size; i++) {
3834     b->cowners[i] += b->cowners[i-1];
3835   }
3836   b->cstart = b->cowners[b->rank];
3837   b->cend   = b->cowners[b->rank+1];
3838 
3839   /* build cache for off array entries formed */
3840   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3841   b->donotstash  = PETSC_FALSE;
3842   b->colmap      = 0;
3843   b->garray      = 0;
3844   b->roworiented = PETSC_TRUE;
3845 
3846   /* stuff used for matrix vector multiply */
3847   b->lvec      = PETSC_NULL;
3848   b->Mvctx     = PETSC_NULL;
3849 
3850   /* stuff for MatGetRow() */
3851   b->rowindices   = 0;
3852   b->rowvalues    = 0;
3853   b->getrowactive = PETSC_FALSE;
3854 
3855   /* Explicitly create 2 MATSEQAIJ matrices. */
3856   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3857   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3858   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3859   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3860   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3861   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3862 
3863   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3864                                      "MatStoreValues_MPIAIJ",
3865                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3866   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3867                                      "MatRetrieveValues_MPIAIJ",
3868                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3869   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3870 				     "MatGetDiagonalBlock_MPIAIJ",
3871                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3872   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3873 				     "MatIsTranspose_MPIAIJ",
3874 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3875   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3876 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3877 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3878   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3879 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3880 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3881   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3882 				     "MatDiagonalScaleLocal_MPIAIJ",
3883 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3884   PetscFunctionReturn(0);
3885 }
3886 EXTERN_C_END
3887