xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a96a251defc07734f733d1abf27b5765e87fdc8b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
40     lastcol1 = col;\
41     while (high1-low1 > 5) { \
42       t = (low1+high1)/2; \
43       if (rp1[t] > col) high1 = t; \
44       else             low1  = t; \
45     } \
46       for (_i=low1; _i<high1; _i++) { \
47         if (rp1[_i] > col) break; \
48         if (rp1[_i] == col) { \
49           if (addv == ADD_VALUES) ap1[_i] += value;   \
50           else                    ap1[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
57       if (nrow1 >= rmax1) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
60         PetscScalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
63  \
64         /* malloc new storage space */ \
65         ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,am+1,PetscInt,&new_i);CHKERRQ(ierr);\
66  \
67         /* copy over old data into new slots */ \
68         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
69         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
70         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \
71         len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \
72         ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \
73         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \
74         ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
75         /* free up old matrix storage */ \
76  \
77        if (!a->singlemalloc) {\
78           ierr = PetscFree(a->a);CHKERRQ(ierr);\
79           ierr = PetscFree(a->i);CHKERRQ(ierr);\
80           ierr = PetscFree(a->j);CHKERRQ(ierr);\
81         } else {\
82           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);\
83         }\
84         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
85         a->singlemalloc = PETSC_TRUE; \
86  \
87         rp1   = aj + ai[row]; ap1 = aa + ai[row]; \
88         rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \
89         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
90         a->maxnz += CHUNKSIZE; \
91         a->reallocs++; \
92       } \
93       N = nrow1++ - 1; a->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp1[ii+1] = rp1[ii]; \
97         ap1[ii+1] = ap1[ii]; \
98       } \
99       rp1[_i] = col;  \
100       ap1[_i] = value;  \
101       a_noinsert: ; \
102       ailen[row] = nrow1; \
103 }
104 
105 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
106 { \
107     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
108     lastcol2 = col;\
109     while (high2-low2 > 5) { \
110       t = (low2+high2)/2; \
111       if (rp2[t] > col) high2 = t; \
112       else             low2  = t; \
113     } \
114        for (_i=low2; _i<high2; _i++) { \
115         if (rp2[_i] > col) break; \
116         if (rp2[_i] == col) { \
117           if (addv == ADD_VALUES) ap2[_i] += value;   \
118           else                    ap2[_i] = value; \
119           goto b_noinsert; \
120         } \
121       }  \
122       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
123       if (nonew == 1) goto b_noinsert; \
124       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
125       if (nrow2 >= rmax2) { \
126         /* there is no extra room in row, therefore enlarge */ \
127         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
128         PetscScalar *new_a; \
129  \
130         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
131  \
132         /* malloc new storage space */ \
133         ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,bm+1,PetscInt,&new_i);CHKERRQ(ierr);\
134  \
135         /* copy over old data into new slots */ \
136         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
137         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
138         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \
139         len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \
140         ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \
141         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \
142         ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
143         /* free up old matrix storage */ \
144  \
145         if (!a->singlemalloc) {\
146           ierr = PetscFree(b->a);CHKERRQ(ierr);\
147           ierr = PetscFree(b->i);CHKERRQ(ierr);\
148           ierr = PetscFree(b->j);CHKERRQ(ierr);\
149         } else {\
150           ierr = PetscFree3(b->a,b->i,b->j);CHKERRQ(ierr);\
151         }\
152         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
153         b->singlemalloc = PETSC_TRUE; \
154  \
155         rp2   = bj + bi[row]; ap2 = ba + bi[row]; \
156         rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \
157         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
158         b->maxnz += CHUNKSIZE; \
159         b->reallocs++; \
160       } \
161       N = nrow2++ - 1; b->nz++; \
162       /* shift up all the later entries in this row */ \
163       for (ii=N; ii>=_i; ii--) { \
164         rp2[ii+1] = rp2[ii]; \
165         ap2[ii+1] = ap2[ii]; \
166       } \
167       rp2[_i] = col;  \
168       ap2[_i] = value;  \
169       b_noinsert: ; \
170       bilen[row] = nrow2; \
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatSetValues_MPIAIJ"
175 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
176 {
177   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
178   PetscScalar    value;
179   PetscErrorCode ierr;
180   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
181   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
182   PetscTruth     roworiented = aij->roworiented;
183 
184   /* Some Variables required in the macro */
185   Mat            A = aij->A;
186   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
187   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
188   PetscScalar    *aa = a->a;
189   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
190   Mat            B = aij->B;
191   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
192   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
193   PetscScalar    *ba = b->a;
194 
195   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
196   PetscInt       nonew = a->nonew;
197   PetscScalar    *ap1,*ap2;
198 
199   PetscFunctionBegin;
200   for (i=0; i<m; i++) {
201     if (im[i] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
204 #endif
205     if (im[i] >= rstart && im[i] < rend) {
206       row      = im[i] - rstart;
207       lastcol1 = -1;
208       rp1      = aj + ai[row];
209       ap1      = aa + ai[row];
210       rmax1    = aimax[row];
211       nrow1    = ailen[row];
212       low1     = 0;
213       high1    = nrow1;
214       lastcol2 = -1;
215       rp2      = bj + bi[row];
216       ap2      = ba + bi[row];
217       rmax2    = bimax[row];
218       nrow2    = bilen[row];
219       low2     = 0;
220       high2    = nrow2;
221 
222       for (j=0; j<n; j++) {
223         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
224         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
225         if (in[j] >= cstart && in[j] < cend){
226           col = in[j] - cstart;
227           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!aij->colmap) {
235               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
239 	    col--;
240 #else
241             col = aij->colmap[in[j]] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
244               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
247               B = aij->B;
248               b = (Mat_SeqAIJ*)B->data;
249               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
250               rp2      = bj + bi[row];
251               ap2      = ba + bi[row];
252               rmax2    = bimax[row];
253               nrow2    = bilen[row];
254               low2     = 0;
255               high2    = nrow2;
256               ba = b->a;
257             }
258           } else col = in[j];
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260         }
261       }
262     } else {
263       if (!aij->donotstash) {
264         if (roworiented) {
265           if (ignorezeroentries && v[i*n] == 0.0) continue;
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
267         } else {
268           if (ignorezeroentries && v[i] == 0.0) continue;
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatGetValues_MPIAIJ"
279 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
280 {
281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
282   PetscErrorCode ierr;
283   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
284   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
326   PetscErrorCode ierr;
327   PetscInt       nstash,reallocs;
328   InsertMode     addv;
329 
330   PetscFunctionBegin;
331   if (aij->donotstash) {
332     PetscFunctionReturn(0);
333   }
334 
335   /* make sure all processors are either in INSERTMODE or ADDMODE */
336   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
337   if (addv == (ADD_VALUES|INSERT_VALUES)) {
338     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
339   }
340   mat->insertmode = addv; /* in case this processor had no cache */
341 
342   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
343   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
344   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   PetscErrorCode ierr;
355   PetscMPIInt    n;
356   PetscInt       i,j,rstart,ncols,flg;
357   PetscInt       *row,*col,other_disassembled;
358   PetscScalar    *val;
359   InsertMode     addv = mat->insertmode;
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379   a->compressedrow.use     = PETSC_FALSE;
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
396   b    = (Mat_SeqAIJ *)aij->B->data;
397 
398   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
399     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
400   }
401   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
402   b->compressedrow.use = PETSC_TRUE;
403   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
404   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
405 
406   if (aij->rowvalues) {
407     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
408     aij->rowvalues = 0;
409   }
410 
411   /* used by MatAXPY() */
412   a->xtoy = 0; b->xtoy = 0;
413   a->XtoY = 0; b->XtoY = 0;
414 
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
423   PetscErrorCode ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   PetscErrorCode ierr;
437   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
438   PetscInt       i,N,*rows,*owners = l->rowners;
439   PetscInt       *nprocs,j,idx,nsends,row;
440   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
441   PetscInt       *rvalues,count,base,slen,*source;
442   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
443   MPI_Comm       comm = A->comm;
444   MPI_Request    *send_waits,*recv_waits;
445   MPI_Status     recv_status,*send_status;
446   IS             istmp;
447 #if defined(PETSC_DEBUG)
448   PetscTruth     found = PETSC_FALSE;
449 #endif
450 
451   PetscFunctionBegin;
452   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
453   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
454 
455   /*  first count number of contributors to each processor */
456   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
457   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
458   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
459   j = 0;
460   for (i=0; i<N; i++) {
461     if (lastidx > (idx = rows[i])) j = 0;
462     lastidx = idx;
463     for (; j<size; j++) {
464       if (idx >= owners[j] && idx < owners[j+1]) {
465         nprocs[2*j]++;
466         nprocs[2*j+1] = 1;
467         owner[i] = j;
468 #if defined(PETSC_DEBUG)
469         found = PETSC_TRUE;
470 #endif
471         break;
472       }
473     }
474 #if defined(PETSC_DEBUG)
475     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
476     found = PETSC_FALSE;
477 #endif
478   }
479   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
480 
481   /* inform other processors of number of messages and max length*/
482   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
483 
484   /* post receives:   */
485   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
486   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
487   for (i=0; i<nrecvs; i++) {
488     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
489   }
490 
491   /* do sends:
492       1) starts[i] gives the starting index in svalues for stuff going to
493          the ith processor
494   */
495   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
496   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
497   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
498   starts[0] = 0;
499   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
500   for (i=0; i<N; i++) {
501     svalues[starts[owner[i]]++] = rows[i];
502   }
503   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
504 
505   starts[0] = 0;
506   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
507   count = 0;
508   for (i=0; i<size; i++) {
509     if (nprocs[2*i+1]) {
510       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
511     }
512   }
513   ierr = PetscFree(starts);CHKERRQ(ierr);
514 
515   base = owners[rank];
516 
517   /*  wait on receives */
518   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
519   source = lens + nrecvs;
520   count  = nrecvs; slen = 0;
521   while (count) {
522     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
523     /* unpack receives into our local space */
524     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
525     source[imdex]  = recv_status.MPI_SOURCE;
526     lens[imdex]    = n;
527     slen          += n;
528     count--;
529   }
530   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
531 
532   /* move the data into the send scatter */
533   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
534   count = 0;
535   for (i=0; i<nrecvs; i++) {
536     values = rvalues + i*nmax;
537     for (j=0; j<lens[i]; j++) {
538       lrows[count++] = values[j] - base;
539     }
540   }
541   ierr = PetscFree(rvalues);CHKERRQ(ierr);
542   ierr = PetscFree(lens);CHKERRQ(ierr);
543   ierr = PetscFree(owner);CHKERRQ(ierr);
544   ierr = PetscFree(nprocs);CHKERRQ(ierr);
545 
546   /* actually zap the local rows */
547   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
548   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
549 
550   /*
551         Zero the required rows. If the "diagonal block" of the matrix
552      is square and the user wishes to set the diagonal we use seperate
553      code so that MatSetValues() is not called for each diagonal allocating
554      new memory, thus calling lots of mallocs and slowing things down.
555 
556        Contributed by: Mathew Knepley
557   */
558   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
559   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
560   if (diag && (l->A->M == l->A->N)) {
561     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
562   } else if (diag) {
563     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
564     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
565       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
566 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
567     }
568     for (i = 0; i < slen; i++) {
569       row  = lrows[i] + rstart;
570       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
571     }
572     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
573     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
574   } else {
575     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
576   }
577   ierr = ISDestroy(istmp);CHKERRQ(ierr);
578   ierr = PetscFree(lrows);CHKERRQ(ierr);
579 
580   /* wait on sends */
581   if (nsends) {
582     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
583     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
584     ierr = PetscFree(send_status);CHKERRQ(ierr);
585   }
586   ierr = PetscFree(send_waits);CHKERRQ(ierr);
587   ierr = PetscFree(svalues);CHKERRQ(ierr);
588 
589   PetscFunctionReturn(0);
590 }
591 
592 #undef __FUNCT__
593 #define __FUNCT__ "MatMult_MPIAIJ"
594 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
595 {
596   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
597   PetscErrorCode ierr;
598   PetscInt       nt;
599 
600   PetscFunctionBegin;
601   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
602   if (nt != A->n) {
603     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
604   }
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultAdd_MPIAIJ"
614 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
615 {
616   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
617   PetscErrorCode ierr;
618 
619   PetscFunctionBegin;
620   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
621   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
622   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
623   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
629 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
630 {
631   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
632   PetscErrorCode ierr;
633   PetscTruth     merged;
634 
635   PetscFunctionBegin;
636   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
637   /* do nondiagonal part */
638   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
639   if (!merged) {
640     /* send it on its way */
641     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
642     /* do local part */
643     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
644     /* receive remote parts: note this assumes the values are not actually */
645     /* added in yy until the next line, */
646     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
647   } else {
648     /* do local part */
649     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
650     /* send it on its way */
651     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
652     /* values actually were received in the Begin() but we need to call this nop */
653     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
654   }
655   PetscFunctionReturn(0);
656 }
657 
658 EXTERN_C_BEGIN
659 #undef __FUNCT__
660 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
661 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
662 {
663   MPI_Comm       comm;
664   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
665   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
666   IS             Me,Notme;
667   PetscErrorCode ierr;
668   PetscInt       M,N,first,last,*notme,i;
669   PetscMPIInt    size;
670 
671   PetscFunctionBegin;
672 
673   /* Easy test: symmetric diagonal block */
674   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
675   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
676   if (!*f) PetscFunctionReturn(0);
677   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
679   if (size == 1) PetscFunctionReturn(0);
680 
681   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
682   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
683   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
684   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
685   for (i=0; i<first; i++) notme[i] = i;
686   for (i=last; i<M; i++) notme[i-last+first] = i;
687   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
688   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
689   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
690   Aoff = Aoffs[0];
691   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
692   Boff = Boffs[0];
693   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
694   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
695   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
696   ierr = ISDestroy(Me);CHKERRQ(ierr);
697   ierr = ISDestroy(Notme);CHKERRQ(ierr);
698 
699   PetscFunctionReturn(0);
700 }
701 EXTERN_C_END
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
705 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
706 {
707   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
708   PetscErrorCode ierr;
709 
710   PetscFunctionBegin;
711   /* do nondiagonal part */
712   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
713   /* send it on its way */
714   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
715   /* do local part */
716   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
717   /* receive remote parts */
718   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 
722 /*
723   This only works correctly for square matrices where the subblock A->A is the
724    diagonal block
725 */
726 #undef __FUNCT__
727 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
728 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
729 {
730   PetscErrorCode ierr;
731   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
732 
733   PetscFunctionBegin;
734   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
735   if (a->rstart != a->cstart || a->rend != a->cend) {
736     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
737   }
738   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatScale_MPIAIJ"
744 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
745 {
746   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
751   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNCT__
756 #define __FUNCT__ "MatDestroy_MPIAIJ"
757 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
758 {
759   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
760   PetscErrorCode ierr;
761 
762   PetscFunctionBegin;
763 #if defined(PETSC_USE_LOG)
764   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
765 #endif
766   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
767   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
768   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
769   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
770 #if defined (PETSC_USE_CTABLE)
771   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
772 #else
773   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
774 #endif
775   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
776   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
777   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
778   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
779   ierr = PetscFree(aij);CHKERRQ(ierr);
780 
781   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
783   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
788   PetscFunctionReturn(0);
789 }
790 
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPIAIJ_Binary"
793 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
794 {
795   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
796   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
797   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
798   PetscErrorCode    ierr;
799   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
800   int               fd;
801   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
802   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
803   PetscScalar       *column_values;
804 
805   PetscFunctionBegin;
806   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
807   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
808   nz   = A->nz + B->nz;
809   if (!rank) {
810     header[0] = MAT_FILE_COOKIE;
811     header[1] = mat->M;
812     header[2] = mat->N;
813     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
814     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
815     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
816     /* get largest number of rows any processor has */
817     rlen = mat->m;
818     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
819     for (i=1; i<size; i++) {
820       rlen = PetscMax(rlen,range[i+1] - range[i]);
821     }
822   } else {
823     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
824     rlen = mat->m;
825   }
826 
827   /* load up the local row counts */
828   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
829   for (i=0; i<mat->m; i++) {
830     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
831   }
832 
833   /* store the row lengths to the file */
834   if (!rank) {
835     MPI_Status status;
836     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
837     for (i=1; i<size; i++) {
838       rlen = range[i+1] - range[i];
839       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
840       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
841     }
842   } else {
843     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
844   }
845   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
846 
847   /* load up the local column indices */
848   nzmax = nz; /* )th processor needs space a largest processor needs */
849   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
850   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
851   cnt  = 0;
852   for (i=0; i<mat->m; i++) {
853     for (j=B->i[i]; j<B->i[i+1]; j++) {
854       if ( (col = garray[B->j[j]]) > cstart) break;
855       column_indices[cnt++] = col;
856     }
857     for (k=A->i[i]; k<A->i[i+1]; k++) {
858       column_indices[cnt++] = A->j[k] + cstart;
859     }
860     for (; j<B->i[i+1]; j++) {
861       column_indices[cnt++] = garray[B->j[j]];
862     }
863   }
864   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
865 
866   /* store the column indices to the file */
867   if (!rank) {
868     MPI_Status status;
869     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
870     for (i=1; i<size; i++) {
871       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
872       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
873       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
875     }
876   } else {
877     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
878     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
879   }
880   ierr = PetscFree(column_indices);CHKERRQ(ierr);
881 
882   /* load up the local column values */
883   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
884   cnt  = 0;
885   for (i=0; i<mat->m; i++) {
886     for (j=B->i[i]; j<B->i[i+1]; j++) {
887       if ( garray[B->j[j]] > cstart) break;
888       column_values[cnt++] = B->a[j];
889     }
890     for (k=A->i[i]; k<A->i[i+1]; k++) {
891       column_values[cnt++] = A->a[k];
892     }
893     for (; j<B->i[i+1]; j++) {
894       column_values[cnt++] = B->a[j];
895     }
896   }
897   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
898 
899   /* store the column values to the file */
900   if (!rank) {
901     MPI_Status status;
902     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
903     for (i=1; i<size; i++) {
904       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
905       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
906       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
907       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
908     }
909   } else {
910     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
911     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
912   }
913   ierr = PetscFree(column_values);CHKERRQ(ierr);
914   PetscFunctionReturn(0);
915 }
916 
917 #undef __FUNCT__
918 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
919 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
920 {
921   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
922   PetscErrorCode    ierr;
923   PetscMPIInt       rank = aij->rank,size = aij->size;
924   PetscTruth        isdraw,iascii,isbinary;
925   PetscViewer       sviewer;
926   PetscViewerFormat format;
927 
928   PetscFunctionBegin;
929   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
930   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
931   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
932   if (iascii) {
933     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
934     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
935       MatInfo    info;
936       PetscTruth inodes;
937 
938       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
939       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
940       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
941       if (!inodes) {
942         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
943 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
944       } else {
945         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
946 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
947       }
948       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
949       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
950       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
951       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
952       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
953       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
954       PetscFunctionReturn(0);
955     } else if (format == PETSC_VIEWER_ASCII_INFO) {
956       PetscInt   inodecount,inodelimit,*inodes;
957       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
958       if (inodes) {
959         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
960       } else {
961         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
962       }
963       PetscFunctionReturn(0);
964     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
965       PetscFunctionReturn(0);
966     }
967   } else if (isbinary) {
968     if (size == 1) {
969       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
970       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
971     } else {
972       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
973     }
974     PetscFunctionReturn(0);
975   } else if (isdraw) {
976     PetscDraw  draw;
977     PetscTruth isnull;
978     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
984     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
985   } else {
986     /* assemble the entire matrix onto first processor. */
987     Mat         A;
988     Mat_SeqAIJ  *Aloc;
989     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
990     PetscScalar *a;
991 
992     if (!rank) {
993       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
994     } else {
995       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
996     }
997     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
998     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
999     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1000     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1001 
1002     /* copy over the A part */
1003     Aloc = (Mat_SeqAIJ*)aij->A->data;
1004     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1005     row = aij->rstart;
1006     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
1007     for (i=0; i<m; i++) {
1008       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1009       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1010     }
1011     aj = Aloc->j;
1012     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
1013 
1014     /* copy over the B part */
1015     Aloc = (Mat_SeqAIJ*)aij->B->data;
1016     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1017     row  = aij->rstart;
1018     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1019     ct   = cols;
1020     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1021     for (i=0; i<m; i++) {
1022       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1023       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1024     }
1025     ierr = PetscFree(ct);CHKERRQ(ierr);
1026     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1027     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1028     /*
1029        Everyone has to call to draw the matrix since the graphics waits are
1030        synchronized across all processors that share the PetscDraw object
1031     */
1032     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1033     if (!rank) {
1034       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1035       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1036     }
1037     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1038     ierr = MatDestroy(A);CHKERRQ(ierr);
1039   }
1040   PetscFunctionReturn(0);
1041 }
1042 
1043 #undef __FUNCT__
1044 #define __FUNCT__ "MatView_MPIAIJ"
1045 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1046 {
1047   PetscErrorCode ierr;
1048   PetscTruth     iascii,isdraw,issocket,isbinary;
1049 
1050   PetscFunctionBegin;
1051   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1052   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1053   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1054   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1055   if (iascii || isdraw || isbinary || issocket) {
1056     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1057   } else {
1058     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatRelax_MPIAIJ"
1067 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1068 {
1069   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1070   PetscErrorCode ierr;
1071   Vec            bb1;
1072   PetscScalar    mone=-1.0;
1073 
1074   PetscFunctionBegin;
1075   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1076 
1077   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1078 
1079   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1080     if (flag & SOR_ZERO_INITIAL_GUESS) {
1081       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1082       its--;
1083     }
1084 
1085     while (its--) {
1086       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1087       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1088 
1089       /* update rhs: bb1 = bb - B*x */
1090       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1091       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1092 
1093       /* local sweep */
1094       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1095       CHKERRQ(ierr);
1096     }
1097   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1098     if (flag & SOR_ZERO_INITIAL_GUESS) {
1099       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1100       its--;
1101     }
1102     while (its--) {
1103       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1104       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1105 
1106       /* update rhs: bb1 = bb - B*x */
1107       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1108       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1109 
1110       /* local sweep */
1111       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1112       CHKERRQ(ierr);
1113     }
1114   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1115     if (flag & SOR_ZERO_INITIAL_GUESS) {
1116       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1117       its--;
1118     }
1119     while (its--) {
1120       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1121       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1122 
1123       /* update rhs: bb1 = bb - B*x */
1124       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1125       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1126 
1127       /* local sweep */
1128       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1129       CHKERRQ(ierr);
1130     }
1131   } else {
1132     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1133   }
1134 
1135   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1141 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1142 {
1143   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1144   Mat            A = mat->A,B = mat->B;
1145   PetscErrorCode ierr;
1146   PetscReal      isend[5],irecv[5];
1147 
1148   PetscFunctionBegin;
1149   info->block_size     = 1.0;
1150   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1151   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1152   isend[3] = info->memory;  isend[4] = info->mallocs;
1153   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1155   isend[3] += info->memory;  isend[4] += info->mallocs;
1156   if (flag == MAT_LOCAL) {
1157     info->nz_used      = isend[0];
1158     info->nz_allocated = isend[1];
1159     info->nz_unneeded  = isend[2];
1160     info->memory       = isend[3];
1161     info->mallocs      = isend[4];
1162   } else if (flag == MAT_GLOBAL_MAX) {
1163     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1164     info->nz_used      = irecv[0];
1165     info->nz_allocated = irecv[1];
1166     info->nz_unneeded  = irecv[2];
1167     info->memory       = irecv[3];
1168     info->mallocs      = irecv[4];
1169   } else if (flag == MAT_GLOBAL_SUM) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   }
1177   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1178   info->fill_ratio_needed = 0;
1179   info->factor_mallocs    = 0;
1180   info->rows_global       = (double)matin->M;
1181   info->columns_global    = (double)matin->N;
1182   info->rows_local        = (double)matin->m;
1183   info->columns_local     = (double)matin->N;
1184 
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPIAIJ"
1190 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1191 {
1192   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   switch (op) {
1197   case MAT_NO_NEW_NONZERO_LOCATIONS:
1198   case MAT_YES_NEW_NONZERO_LOCATIONS:
1199   case MAT_COLUMNS_UNSORTED:
1200   case MAT_COLUMNS_SORTED:
1201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1202   case MAT_KEEP_ZEROED_ROWS:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204   case MAT_USE_INODES:
1205   case MAT_DO_NOT_USE_INODES:
1206   case MAT_IGNORE_ZERO_ENTRIES:
1207     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1209     break;
1210   case MAT_ROW_ORIENTED:
1211     a->roworiented = PETSC_TRUE;
1212     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1214     break;
1215   case MAT_ROWS_SORTED:
1216   case MAT_ROWS_UNSORTED:
1217   case MAT_YES_NEW_DIAGONALS:
1218     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1219     break;
1220   case MAT_COLUMN_ORIENTED:
1221     a->roworiented = PETSC_FALSE;
1222     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1223     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1224     break;
1225   case MAT_IGNORE_OFF_PROC_ENTRIES:
1226     a->donotstash = PETSC_TRUE;
1227     break;
1228   case MAT_NO_NEW_DIAGONALS:
1229     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1230   case MAT_SYMMETRIC:
1231   case MAT_STRUCTURALLY_SYMMETRIC:
1232   case MAT_HERMITIAN:
1233   case MAT_SYMMETRY_ETERNAL:
1234     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1235     break;
1236   case MAT_NOT_SYMMETRIC:
1237   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1238   case MAT_NOT_HERMITIAN:
1239   case MAT_NOT_SYMMETRY_ETERNAL:
1240     break;
1241   default:
1242     SETERRQ(PETSC_ERR_SUP,"unknown option");
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatGetRow_MPIAIJ"
1249 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1250 {
1251   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1252   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1253   PetscErrorCode ierr;
1254   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1255   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1256   PetscInt       *cmap,*idx_p;
1257 
1258   PetscFunctionBegin;
1259   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1260   mat->getrowactive = PETSC_TRUE;
1261 
1262   if (!mat->rowvalues && (idx || v)) {
1263     /*
1264         allocate enough space to hold information from the longest row.
1265     */
1266     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1267     PetscInt     max = 1,tmp;
1268     for (i=0; i<matin->m; i++) {
1269       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1270       if (max < tmp) { max = tmp; }
1271     }
1272     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1273     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1274   }
1275 
1276   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1277   lrow = row - rstart;
1278 
1279   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1280   if (!v)   {pvA = 0; pvB = 0;}
1281   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1282   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1283   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1284   nztot = nzA + nzB;
1285 
1286   cmap  = mat->garray;
1287   if (v  || idx) {
1288     if (nztot) {
1289       /* Sort by increasing column numbers, assuming A and B already sorted */
1290       PetscInt imark = -1;
1291       if (v) {
1292         *v = v_p = mat->rowvalues;
1293         for (i=0; i<nzB; i++) {
1294           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1295           else break;
1296         }
1297         imark = i;
1298         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1299         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1300       }
1301       if (idx) {
1302         *idx = idx_p = mat->rowindices;
1303         if (imark > -1) {
1304           for (i=0; i<imark; i++) {
1305             idx_p[i] = cmap[cworkB[i]];
1306           }
1307         } else {
1308           for (i=0; i<nzB; i++) {
1309             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1310             else break;
1311           }
1312           imark = i;
1313         }
1314         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1315         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1316       }
1317     } else {
1318       if (idx) *idx = 0;
1319       if (v)   *v   = 0;
1320     }
1321   }
1322   *nz = nztot;
1323   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1324   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1330 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1331 {
1332   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1333 
1334   PetscFunctionBegin;
1335   if (!aij->getrowactive) {
1336     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1337   }
1338   aij->getrowactive = PETSC_FALSE;
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatNorm_MPIAIJ"
1344 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1345 {
1346   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1347   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1348   PetscErrorCode ierr;
1349   PetscInt       i,j,cstart = aij->cstart;
1350   PetscReal      sum = 0.0;
1351   PetscScalar    *v;
1352 
1353   PetscFunctionBegin;
1354   if (aij->size == 1) {
1355     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1356   } else {
1357     if (type == NORM_FROBENIUS) {
1358       v = amat->a;
1359       for (i=0; i<amat->nz; i++) {
1360 #if defined(PETSC_USE_COMPLEX)
1361         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1362 #else
1363         sum += (*v)*(*v); v++;
1364 #endif
1365       }
1366       v = bmat->a;
1367       for (i=0; i<bmat->nz; i++) {
1368 #if defined(PETSC_USE_COMPLEX)
1369         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1370 #else
1371         sum += (*v)*(*v); v++;
1372 #endif
1373       }
1374       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1375       *norm = sqrt(*norm);
1376     } else if (type == NORM_1) { /* max column norm */
1377       PetscReal *tmp,*tmp2;
1378       PetscInt    *jj,*garray = aij->garray;
1379       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1380       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1381       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1382       *norm = 0.0;
1383       v = amat->a; jj = amat->j;
1384       for (j=0; j<amat->nz; j++) {
1385         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1386       }
1387       v = bmat->a; jj = bmat->j;
1388       for (j=0; j<bmat->nz; j++) {
1389         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1390       }
1391       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1392       for (j=0; j<mat->N; j++) {
1393         if (tmp2[j] > *norm) *norm = tmp2[j];
1394       }
1395       ierr = PetscFree(tmp);CHKERRQ(ierr);
1396       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1397     } else if (type == NORM_INFINITY) { /* max row norm */
1398       PetscReal ntemp = 0.0;
1399       for (j=0; j<aij->A->m; j++) {
1400         v = amat->a + amat->i[j];
1401         sum = 0.0;
1402         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1403           sum += PetscAbsScalar(*v); v++;
1404         }
1405         v = bmat->a + bmat->i[j];
1406         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1407           sum += PetscAbsScalar(*v); v++;
1408         }
1409         if (sum > ntemp) ntemp = sum;
1410       }
1411       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1412     } else {
1413       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1414     }
1415   }
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatTranspose_MPIAIJ"
1421 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1422 {
1423   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1424   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1425   PetscErrorCode ierr;
1426   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1427   Mat            B;
1428   PetscScalar    *array;
1429 
1430   PetscFunctionBegin;
1431   if (!matout && M != N) {
1432     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1433   }
1434 
1435   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1436   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1437   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1438 
1439   /* copy over the A part */
1440   Aloc = (Mat_SeqAIJ*)a->A->data;
1441   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1442   row = a->rstart;
1443   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1444   for (i=0; i<m; i++) {
1445     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1446     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1447   }
1448   aj = Aloc->j;
1449   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1450 
1451   /* copy over the B part */
1452   Aloc = (Mat_SeqAIJ*)a->B->data;
1453   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1454   row  = a->rstart;
1455   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1456   ct   = cols;
1457   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1458   for (i=0; i<m; i++) {
1459     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1460     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1461   }
1462   ierr = PetscFree(ct);CHKERRQ(ierr);
1463   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1464   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1465   if (matout) {
1466     *matout = B;
1467   } else {
1468     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1469   }
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1475 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1476 {
1477   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1478   Mat            a = aij->A,b = aij->B;
1479   PetscErrorCode ierr;
1480   PetscInt       s1,s2,s3;
1481 
1482   PetscFunctionBegin;
1483   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1484   if (rr) {
1485     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1486     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1487     /* Overlap communication with computation. */
1488     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1489   }
1490   if (ll) {
1491     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1492     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1493     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1494   }
1495   /* scale  the diagonal block */
1496   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1497 
1498   if (rr) {
1499     /* Do a scatter end and then right scale the off-diagonal block */
1500     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1501     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1502   }
1503 
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1510 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1511 {
1512   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   if (!a->rank) {
1517     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1518   }
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 #undef __FUNCT__
1523 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1524 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1525 {
1526   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1527   PetscErrorCode ierr;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1531   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1532   PetscFunctionReturn(0);
1533 }
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1536 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1537 {
1538   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatEqual_MPIAIJ"
1548 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1549 {
1550   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1551   Mat            a,b,c,d;
1552   PetscTruth     flg;
1553   PetscErrorCode ierr;
1554 
1555   PetscFunctionBegin;
1556   a = matA->A; b = matA->B;
1557   c = matB->A; d = matB->B;
1558 
1559   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1560   if (flg) {
1561     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1562   }
1563   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatCopy_MPIAIJ"
1569 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1570 {
1571   PetscErrorCode ierr;
1572   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1573   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1574 
1575   PetscFunctionBegin;
1576   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1577   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1578     /* because of the column compression in the off-processor part of the matrix a->B,
1579        the number of columns in a->B and b->B may be different, hence we cannot call
1580        the MatCopy() directly on the two parts. If need be, we can provide a more
1581        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1582        then copying the submatrices */
1583     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1584   } else {
1585     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1586     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1593 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1594 {
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #include "petscblaslapack.h"
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatAXPY_MPIAIJ"
1605 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1606 {
1607   PetscErrorCode ierr;
1608   PetscInt       i;
1609   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1610   PetscBLASInt   bnz,one=1;
1611   Mat_SeqAIJ     *x,*y;
1612 
1613   PetscFunctionBegin;
1614   if (str == SAME_NONZERO_PATTERN) {
1615     x = (Mat_SeqAIJ *)xx->A->data;
1616     y = (Mat_SeqAIJ *)yy->A->data;
1617     bnz = (PetscBLASInt)x->nz;
1618     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1619     x = (Mat_SeqAIJ *)xx->B->data;
1620     y = (Mat_SeqAIJ *)yy->B->data;
1621     bnz = (PetscBLASInt)x->nz;
1622     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1623   } else if (str == SUBSET_NONZERO_PATTERN) {
1624     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1625 
1626     x = (Mat_SeqAIJ *)xx->B->data;
1627     y = (Mat_SeqAIJ *)yy->B->data;
1628     if (y->xtoy && y->XtoY != xx->B) {
1629       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1630       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1631     }
1632     if (!y->xtoy) { /* get xtoy */
1633       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1634       y->XtoY = xx->B;
1635     }
1636     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1637   } else {
1638     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1639   }
1640   PetscFunctionReturn(0);
1641 }
1642 
1643 /* -------------------------------------------------------------------*/
1644 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1645        MatGetRow_MPIAIJ,
1646        MatRestoreRow_MPIAIJ,
1647        MatMult_MPIAIJ,
1648 /* 4*/ MatMultAdd_MPIAIJ,
1649        MatMultTranspose_MPIAIJ,
1650        MatMultTransposeAdd_MPIAIJ,
1651        0,
1652        0,
1653        0,
1654 /*10*/ 0,
1655        0,
1656        0,
1657        MatRelax_MPIAIJ,
1658        MatTranspose_MPIAIJ,
1659 /*15*/ MatGetInfo_MPIAIJ,
1660        MatEqual_MPIAIJ,
1661        MatGetDiagonal_MPIAIJ,
1662        MatDiagonalScale_MPIAIJ,
1663        MatNorm_MPIAIJ,
1664 /*20*/ MatAssemblyBegin_MPIAIJ,
1665        MatAssemblyEnd_MPIAIJ,
1666        0,
1667        MatSetOption_MPIAIJ,
1668        MatZeroEntries_MPIAIJ,
1669 /*25*/ MatZeroRows_MPIAIJ,
1670        0,
1671        0,
1672        0,
1673        0,
1674 /*30*/ MatSetUpPreallocation_MPIAIJ,
1675        0,
1676        0,
1677        0,
1678        0,
1679 /*35*/ MatDuplicate_MPIAIJ,
1680        0,
1681        0,
1682        0,
1683        0,
1684 /*40*/ MatAXPY_MPIAIJ,
1685        MatGetSubMatrices_MPIAIJ,
1686        MatIncreaseOverlap_MPIAIJ,
1687        MatGetValues_MPIAIJ,
1688        MatCopy_MPIAIJ,
1689 /*45*/ MatPrintHelp_MPIAIJ,
1690        MatScale_MPIAIJ,
1691        0,
1692        0,
1693        0,
1694 /*50*/ MatSetBlockSize_MPIAIJ,
1695        0,
1696        0,
1697        0,
1698        0,
1699 /*55*/ MatFDColoringCreate_MPIAIJ,
1700        0,
1701        MatSetUnfactored_MPIAIJ,
1702        0,
1703        0,
1704 /*60*/ MatGetSubMatrix_MPIAIJ,
1705        MatDestroy_MPIAIJ,
1706        MatView_MPIAIJ,
1707        MatGetPetscMaps_Petsc,
1708        0,
1709 /*65*/ 0,
1710        0,
1711        0,
1712        0,
1713        0,
1714 /*70*/ 0,
1715        0,
1716        MatSetColoring_MPIAIJ,
1717 #if defined(PETSC_HAVE_ADIC)
1718        MatSetValuesAdic_MPIAIJ,
1719 #else
1720        0,
1721 #endif
1722        MatSetValuesAdifor_MPIAIJ,
1723 /*75*/ 0,
1724        0,
1725        0,
1726        0,
1727        0,
1728 /*80*/ 0,
1729        0,
1730        0,
1731        0,
1732 /*84*/ MatLoad_MPIAIJ,
1733        0,
1734        0,
1735        0,
1736        0,
1737        0,
1738 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1739        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1740        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1741        MatPtAP_Basic,
1742        MatPtAPSymbolic_MPIAIJ,
1743 /*95*/ MatPtAPNumeric_MPIAIJ,
1744        0,
1745        0,
1746        0,
1747        0,
1748 /*100*/0,
1749        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1750        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1751 };
1752 
1753 /* ----------------------------------------------------------------------------------------*/
1754 
1755 EXTERN_C_BEGIN
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1758 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1759 {
1760   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1761   PetscErrorCode ierr;
1762 
1763   PetscFunctionBegin;
1764   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1765   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 EXTERN_C_END
1769 
1770 EXTERN_C_BEGIN
1771 #undef __FUNCT__
1772 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1773 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1774 {
1775   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1776   PetscErrorCode ierr;
1777 
1778   PetscFunctionBegin;
1779   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1780   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 EXTERN_C_END
1784 
1785 #include "petscpc.h"
1786 EXTERN_C_BEGIN
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1789 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1790 {
1791   Mat_MPIAIJ     *b;
1792   PetscErrorCode ierr;
1793   PetscInt       i;
1794 
1795   PetscFunctionBegin;
1796   B->preallocated = PETSC_TRUE;
1797   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1798   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1799   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1800   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1801   if (d_nnz) {
1802     for (i=0; i<B->m; i++) {
1803       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1804     }
1805   }
1806   if (o_nnz) {
1807     for (i=0; i<B->m; i++) {
1808       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1809     }
1810   }
1811   b = (Mat_MPIAIJ*)B->data;
1812   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1813   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1814 
1815   PetscFunctionReturn(0);
1816 }
1817 EXTERN_C_END
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1821 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1822 {
1823   Mat            mat;
1824   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1825   PetscErrorCode ierr;
1826 
1827   PetscFunctionBegin;
1828   *newmat       = 0;
1829   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1830   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1831   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1832   a    = (Mat_MPIAIJ*)mat->data;
1833 
1834   mat->factor       = matin->factor;
1835   mat->bs           = matin->bs;
1836   mat->assembled    = PETSC_TRUE;
1837   mat->insertmode   = NOT_SET_VALUES;
1838   mat->preallocated = PETSC_TRUE;
1839 
1840   a->rstart       = oldmat->rstart;
1841   a->rend         = oldmat->rend;
1842   a->cstart       = oldmat->cstart;
1843   a->cend         = oldmat->cend;
1844   a->size         = oldmat->size;
1845   a->rank         = oldmat->rank;
1846   a->donotstash   = oldmat->donotstash;
1847   a->roworiented  = oldmat->roworiented;
1848   a->rowindices   = 0;
1849   a->rowvalues    = 0;
1850   a->getrowactive = PETSC_FALSE;
1851 
1852   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1853   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1854   if (oldmat->colmap) {
1855 #if defined (PETSC_USE_CTABLE)
1856     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1857 #else
1858     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1859     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1860     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1861 #endif
1862   } else a->colmap = 0;
1863   if (oldmat->garray) {
1864     PetscInt len;
1865     len  = oldmat->B->n;
1866     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1867     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1868     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1869   } else a->garray = 0;
1870 
1871   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1872   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1873   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1874   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1875   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1876   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1877   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1878   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1879   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1880   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1881   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1882   *newmat = mat;
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 #include "petscsys.h"
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatLoad_MPIAIJ"
1890 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1891 {
1892   Mat            A;
1893   PetscScalar    *vals,*svals;
1894   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1895   MPI_Status     status;
1896   PetscErrorCode ierr;
1897   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1898   PetscInt       i,nz,j,rstart,rend,mmax;
1899   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1900   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1901   PetscInt       cend,cstart,n,*rowners;
1902   int            fd;
1903 
1904   PetscFunctionBegin;
1905   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1906   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1907   if (!rank) {
1908     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1909     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1910     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1911   }
1912 
1913   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1914   M = header[1]; N = header[2];
1915   /* determine ownership of all rows */
1916   m    = M/size + ((M % size) > rank);
1917   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1918   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1919 
1920   /* First process needs enough room for process with most rows */
1921   if (!rank) {
1922     mmax       = rowners[1];
1923     for (i=2; i<size; i++) {
1924       mmax = PetscMax(mmax,rowners[i]);
1925     }
1926   } else mmax = m;
1927 
1928   rowners[0] = 0;
1929   for (i=2; i<=size; i++) {
1930     mmax       = PetscMax(mmax,rowners[i]);
1931     rowners[i] += rowners[i-1];
1932   }
1933   rstart = rowners[rank];
1934   rend   = rowners[rank+1];
1935 
1936   /* distribute row lengths to all processors */
1937   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1938   if (!rank) {
1939     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1940     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1941     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1942     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1943     for (j=0; j<m; j++) {
1944       procsnz[0] += ourlens[j];
1945     }
1946     for (i=1; i<size; i++) {
1947       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1948       /* calculate the number of nonzeros on each processor */
1949       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1950         procsnz[i] += rowlengths[j];
1951       }
1952       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1953     }
1954     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1955   } else {
1956     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1957   }
1958 
1959   if (!rank) {
1960     /* determine max buffer needed and allocate it */
1961     maxnz = 0;
1962     for (i=0; i<size; i++) {
1963       maxnz = PetscMax(maxnz,procsnz[i]);
1964     }
1965     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1966 
1967     /* read in my part of the matrix column indices  */
1968     nz   = procsnz[0];
1969     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1970     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1971 
1972     /* read in every one elses and ship off */
1973     for (i=1; i<size; i++) {
1974       nz   = procsnz[i];
1975       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1976       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1977     }
1978     ierr = PetscFree(cols);CHKERRQ(ierr);
1979   } else {
1980     /* determine buffer space needed for message */
1981     nz = 0;
1982     for (i=0; i<m; i++) {
1983       nz += ourlens[i];
1984     }
1985     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1986 
1987     /* receive message of column indices*/
1988     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1989     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1990     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1991   }
1992 
1993   /* determine column ownership if matrix is not square */
1994   if (N != M) {
1995     n      = N/size + ((N % size) > rank);
1996     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1997     cstart = cend - n;
1998   } else {
1999     cstart = rstart;
2000     cend   = rend;
2001     n      = cend - cstart;
2002   }
2003 
2004   /* loop over local rows, determining number of off diagonal entries */
2005   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
2006   jj = 0;
2007   for (i=0; i<m; i++) {
2008     for (j=0; j<ourlens[i]; j++) {
2009       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2010       jj++;
2011     }
2012   }
2013 
2014   /* create our matrix */
2015   for (i=0; i<m; i++) {
2016     ourlens[i] -= offlens[i];
2017   }
2018   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2019   ierr = MatSetType(A,type);CHKERRQ(ierr);
2020   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2021 
2022   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2023   for (i=0; i<m; i++) {
2024     ourlens[i] += offlens[i];
2025   }
2026 
2027   if (!rank) {
2028     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2029 
2030     /* read in my part of the matrix numerical values  */
2031     nz   = procsnz[0];
2032     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2033 
2034     /* insert into matrix */
2035     jj      = rstart;
2036     smycols = mycols;
2037     svals   = vals;
2038     for (i=0; i<m; i++) {
2039       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2040       smycols += ourlens[i];
2041       svals   += ourlens[i];
2042       jj++;
2043     }
2044 
2045     /* read in other processors and ship out */
2046     for (i=1; i<size; i++) {
2047       nz   = procsnz[i];
2048       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2049       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2050     }
2051     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2052   } else {
2053     /* receive numeric values */
2054     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2055 
2056     /* receive message of values*/
2057     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2058     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2059     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2060 
2061     /* insert into matrix */
2062     jj      = rstart;
2063     smycols = mycols;
2064     svals   = vals;
2065     for (i=0; i<m; i++) {
2066       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2067       smycols += ourlens[i];
2068       svals   += ourlens[i];
2069       jj++;
2070     }
2071   }
2072   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2073   ierr = PetscFree(vals);CHKERRQ(ierr);
2074   ierr = PetscFree(mycols);CHKERRQ(ierr);
2075   ierr = PetscFree(rowners);CHKERRQ(ierr);
2076 
2077   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   *newmat = A;
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2085 /*
2086     Not great since it makes two copies of the submatrix, first an SeqAIJ
2087   in local and then by concatenating the local matrices the end result.
2088   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2089 */
2090 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2091 {
2092   PetscErrorCode ierr;
2093   PetscMPIInt    rank,size;
2094   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2095   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2096   Mat            *local,M,Mreuse;
2097   PetscScalar    *vwork,*aa;
2098   MPI_Comm       comm = mat->comm;
2099   Mat_SeqAIJ     *aij;
2100 
2101 
2102   PetscFunctionBegin;
2103   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105 
2106   if (call ==  MAT_REUSE_MATRIX) {
2107     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2108     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2109     local = &Mreuse;
2110     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2111   } else {
2112     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2113     Mreuse = *local;
2114     ierr   = PetscFree(local);CHKERRQ(ierr);
2115   }
2116 
2117   /*
2118       m - number of local rows
2119       n - number of columns (same on all processors)
2120       rstart - first row in new global matrix generated
2121   */
2122   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2123   if (call == MAT_INITIAL_MATRIX) {
2124     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2125     ii  = aij->i;
2126     jj  = aij->j;
2127 
2128     /*
2129         Determine the number of non-zeros in the diagonal and off-diagonal
2130         portions of the matrix in order to do correct preallocation
2131     */
2132 
2133     /* first get start and end of "diagonal" columns */
2134     if (csize == PETSC_DECIDE) {
2135       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2136       if (mglobal == n) { /* square matrix */
2137 	nlocal = m;
2138       } else {
2139         nlocal = n/size + ((n % size) > rank);
2140       }
2141     } else {
2142       nlocal = csize;
2143     }
2144     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2145     rstart = rend - nlocal;
2146     if (rank == size - 1 && rend != n) {
2147       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2148     }
2149 
2150     /* next, compute all the lengths */
2151     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2152     olens = dlens + m;
2153     for (i=0; i<m; i++) {
2154       jend = ii[i+1] - ii[i];
2155       olen = 0;
2156       dlen = 0;
2157       for (j=0; j<jend; j++) {
2158         if (*jj < rstart || *jj >= rend) olen++;
2159         else dlen++;
2160         jj++;
2161       }
2162       olens[i] = olen;
2163       dlens[i] = dlen;
2164     }
2165     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2166     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2167     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2168     ierr = PetscFree(dlens);CHKERRQ(ierr);
2169   } else {
2170     PetscInt ml,nl;
2171 
2172     M = *newmat;
2173     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2174     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2175     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2176     /*
2177          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2178        rather than the slower MatSetValues().
2179     */
2180     M->was_assembled = PETSC_TRUE;
2181     M->assembled     = PETSC_FALSE;
2182   }
2183   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2184   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2185   ii  = aij->i;
2186   jj  = aij->j;
2187   aa  = aij->a;
2188   for (i=0; i<m; i++) {
2189     row   = rstart + i;
2190     nz    = ii[i+1] - ii[i];
2191     cwork = jj;     jj += nz;
2192     vwork = aa;     aa += nz;
2193     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2194   }
2195 
2196   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   *newmat = M;
2199 
2200   /* save submatrix used in processor for next request */
2201   if (call ==  MAT_INITIAL_MATRIX) {
2202     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2203     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2204   }
2205 
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 EXTERN_C_BEGIN
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2212 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2213 {
2214   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2215   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2216   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2217   const PetscInt *JJ;
2218   PetscScalar    *values;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222 #if defined(PETSC_OPT_g)
2223   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2224 #endif
2225   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2226   o_nnz = d_nnz + m;
2227 
2228   for (i=0; i<m; i++) {
2229     nnz     = I[i+1]- I[i];
2230     JJ      = J + I[i];
2231     nnz_max = PetscMax(nnz_max,nnz);
2232 #if defined(PETSC_OPT_g)
2233     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2234 #endif
2235     for (j=0; j<nnz; j++) {
2236       if (*JJ >= cstart) break;
2237       JJ++;
2238     }
2239     d = 0;
2240     for (; j<nnz; j++) {
2241       if (*JJ++ >= cend) break;
2242       d++;
2243     }
2244     d_nnz[i] = d;
2245     o_nnz[i] = nnz - d;
2246   }
2247   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2248   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2249 
2250   if (v) values = (PetscScalar*)v;
2251   else {
2252     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2253     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2254   }
2255 
2256   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2257   for (i=0; i<m; i++) {
2258     ii   = i + rstart;
2259     nnz  = I[i+1]- I[i];
2260     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2261   }
2262   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2263   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2264   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2265 
2266   if (!v) {
2267     ierr = PetscFree(values);CHKERRQ(ierr);
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 EXTERN_C_END
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2275 /*@C
2276    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2277    (the default parallel PETSc format).
2278 
2279    Collective on MPI_Comm
2280 
2281    Input Parameters:
2282 +  A - the matrix
2283 .  i - the indices into j for the start of each local row (starts with zero)
2284 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2285 -  v - optional values in the matrix
2286 
2287    Level: developer
2288 
2289 .keywords: matrix, aij, compressed row, sparse, parallel
2290 
2291 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2292 @*/
2293 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2294 {
2295   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2296 
2297   PetscFunctionBegin;
2298   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2299   if (f) {
2300     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2301   }
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2307 /*@C
2308    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2309    (the default parallel PETSc format).  For good matrix assembly performance
2310    the user should preallocate the matrix storage by setting the parameters
2311    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2312    performance can be increased by more than a factor of 50.
2313 
2314    Collective on MPI_Comm
2315 
2316    Input Parameters:
2317 +  A - the matrix
2318 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2319            (same value is used for all local rows)
2320 .  d_nnz - array containing the number of nonzeros in the various rows of the
2321            DIAGONAL portion of the local submatrix (possibly different for each row)
2322            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2323            The size of this array is equal to the number of local rows, i.e 'm'.
2324            You must leave room for the diagonal entry even if it is zero.
2325 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2326            submatrix (same value is used for all local rows).
2327 -  o_nnz - array containing the number of nonzeros in the various rows of the
2328            OFF-DIAGONAL portion of the local submatrix (possibly different for
2329            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2330            structure. The size of this array is equal to the number
2331            of local rows, i.e 'm'.
2332 
2333    If the *_nnz parameter is given then the *_nz parameter is ignored
2334 
2335    The AIJ format (also called the Yale sparse matrix format or
2336    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2337    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2338 
2339    The parallel matrix is partitioned such that the first m0 rows belong to
2340    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2341    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2342 
2343    The DIAGONAL portion of the local submatrix of a processor can be defined
2344    as the submatrix which is obtained by extraction the part corresponding
2345    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2346    first row that belongs to the processor, and r2 is the last row belonging
2347    to the this processor. This is a square mxm matrix. The remaining portion
2348    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2349 
2350    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2351 
2352    Example usage:
2353 
2354    Consider the following 8x8 matrix with 34 non-zero values, that is
2355    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2356    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2357    as follows:
2358 
2359 .vb
2360             1  2  0  |  0  3  0  |  0  4
2361     Proc0   0  5  6  |  7  0  0  |  8  0
2362             9  0 10  | 11  0  0  | 12  0
2363     -------------------------------------
2364            13  0 14  | 15 16 17  |  0  0
2365     Proc1   0 18  0  | 19 20 21  |  0  0
2366             0  0  0  | 22 23  0  | 24  0
2367     -------------------------------------
2368     Proc2  25 26 27  |  0  0 28  | 29  0
2369            30  0  0  | 31 32 33  |  0 34
2370 .ve
2371 
2372    This can be represented as a collection of submatrices as:
2373 
2374 .vb
2375       A B C
2376       D E F
2377       G H I
2378 .ve
2379 
2380    Where the submatrices A,B,C are owned by proc0, D,E,F are
2381    owned by proc1, G,H,I are owned by proc2.
2382 
2383    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2384    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2385    The 'M','N' parameters are 8,8, and have the same values on all procs.
2386 
2387    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2388    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2389    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2390    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2391    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2392    matrix, ans [DF] as another SeqAIJ matrix.
2393 
2394    When d_nz, o_nz parameters are specified, d_nz storage elements are
2395    allocated for every row of the local diagonal submatrix, and o_nz
2396    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2397    One way to choose d_nz and o_nz is to use the max nonzerors per local
2398    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2399    In this case, the values of d_nz,o_nz are:
2400 .vb
2401      proc0 : dnz = 2, o_nz = 2
2402      proc1 : dnz = 3, o_nz = 2
2403      proc2 : dnz = 1, o_nz = 4
2404 .ve
2405    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2406    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2407    for proc3. i.e we are using 12+15+10=37 storage locations to store
2408    34 values.
2409 
2410    When d_nnz, o_nnz parameters are specified, the storage is specified
2411    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2412    In the above case the values for d_nnz,o_nnz are:
2413 .vb
2414      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2415      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2416      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2417 .ve
2418    Here the space allocated is sum of all the above values i.e 34, and
2419    hence pre-allocation is perfect.
2420 
2421    Level: intermediate
2422 
2423 .keywords: matrix, aij, compressed row, sparse, parallel
2424 
2425 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2426           MPIAIJ
2427 @*/
2428 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2429 {
2430   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2431 
2432   PetscFunctionBegin;
2433   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2434   if (f) {
2435     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2436   }
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 #undef __FUNCT__
2441 #define __FUNCT__ "MatCreateMPIAIJ"
2442 /*@C
2443    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2444    (the default parallel PETSc format).  For good matrix assembly performance
2445    the user should preallocate the matrix storage by setting the parameters
2446    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2447    performance can be increased by more than a factor of 50.
2448 
2449    Collective on MPI_Comm
2450 
2451    Input Parameters:
2452 +  comm - MPI communicator
2453 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2454            This value should be the same as the local size used in creating the
2455            y vector for the matrix-vector product y = Ax.
2456 .  n - This value should be the same as the local size used in creating the
2457        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2458        calculated if N is given) For square matrices n is almost always m.
2459 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2460 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2461 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2462            (same value is used for all local rows)
2463 .  d_nnz - array containing the number of nonzeros in the various rows of the
2464            DIAGONAL portion of the local submatrix (possibly different for each row)
2465            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2466            The size of this array is equal to the number of local rows, i.e 'm'.
2467            You must leave room for the diagonal entry even if it is zero.
2468 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2469            submatrix (same value is used for all local rows).
2470 -  o_nnz - array containing the number of nonzeros in the various rows of the
2471            OFF-DIAGONAL portion of the local submatrix (possibly different for
2472            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2473            structure. The size of this array is equal to the number
2474            of local rows, i.e 'm'.
2475 
2476    Output Parameter:
2477 .  A - the matrix
2478 
2479    Notes:
2480    If the *_nnz parameter is given then the *_nz parameter is ignored
2481 
2482    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2483    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2484    storage requirements for this matrix.
2485 
2486    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2487    processor than it must be used on all processors that share the object for
2488    that argument.
2489 
2490    The user MUST specify either the local or global matrix dimensions
2491    (possibly both).
2492 
2493    The parallel matrix is partitioned such that the first m0 rows belong to
2494    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2495    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2496 
2497    The DIAGONAL portion of the local submatrix of a processor can be defined
2498    as the submatrix which is obtained by extraction the part corresponding
2499    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2500    first row that belongs to the processor, and r2 is the last row belonging
2501    to the this processor. This is a square mxm matrix. The remaining portion
2502    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2503 
2504    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2505 
2506    When calling this routine with a single process communicator, a matrix of
2507    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2508    type of communicator, use the construction mechanism:
2509      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2510 
2511    By default, this format uses inodes (identical nodes) when possible.
2512    We search for consecutive rows with the same nonzero structure, thereby
2513    reusing matrix information to achieve increased efficiency.
2514 
2515    Options Database Keys:
2516 +  -mat_no_inode  - Do not use inodes
2517 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2518 -  -mat_aij_oneindex - Internally use indexing starting at 1
2519         rather than 0.  Note that when calling MatSetValues(),
2520         the user still MUST index entries starting at 0!
2521 
2522 
2523    Example usage:
2524 
2525    Consider the following 8x8 matrix with 34 non-zero values, that is
2526    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2527    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2528    as follows:
2529 
2530 .vb
2531             1  2  0  |  0  3  0  |  0  4
2532     Proc0   0  5  6  |  7  0  0  |  8  0
2533             9  0 10  | 11  0  0  | 12  0
2534     -------------------------------------
2535            13  0 14  | 15 16 17  |  0  0
2536     Proc1   0 18  0  | 19 20 21  |  0  0
2537             0  0  0  | 22 23  0  | 24  0
2538     -------------------------------------
2539     Proc2  25 26 27  |  0  0 28  | 29  0
2540            30  0  0  | 31 32 33  |  0 34
2541 .ve
2542 
2543    This can be represented as a collection of submatrices as:
2544 
2545 .vb
2546       A B C
2547       D E F
2548       G H I
2549 .ve
2550 
2551    Where the submatrices A,B,C are owned by proc0, D,E,F are
2552    owned by proc1, G,H,I are owned by proc2.
2553 
2554    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2555    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2556    The 'M','N' parameters are 8,8, and have the same values on all procs.
2557 
2558    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2559    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2560    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2561    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2562    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2563    matrix, ans [DF] as another SeqAIJ matrix.
2564 
2565    When d_nz, o_nz parameters are specified, d_nz storage elements are
2566    allocated for every row of the local diagonal submatrix, and o_nz
2567    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2568    One way to choose d_nz and o_nz is to use the max nonzerors per local
2569    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2570    In this case, the values of d_nz,o_nz are:
2571 .vb
2572      proc0 : dnz = 2, o_nz = 2
2573      proc1 : dnz = 3, o_nz = 2
2574      proc2 : dnz = 1, o_nz = 4
2575 .ve
2576    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2577    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2578    for proc3. i.e we are using 12+15+10=37 storage locations to store
2579    34 values.
2580 
2581    When d_nnz, o_nnz parameters are specified, the storage is specified
2582    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2583    In the above case the values for d_nnz,o_nnz are:
2584 .vb
2585      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2586      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2587      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2588 .ve
2589    Here the space allocated is sum of all the above values i.e 34, and
2590    hence pre-allocation is perfect.
2591 
2592    Level: intermediate
2593 
2594 .keywords: matrix, aij, compressed row, sparse, parallel
2595 
2596 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2597           MPIAIJ
2598 @*/
2599 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2600 {
2601   PetscErrorCode ierr;
2602   PetscMPIInt    size;
2603 
2604   PetscFunctionBegin;
2605   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2607   if (size > 1) {
2608     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2609     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2610   } else {
2611     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2612     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2613   }
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2619 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2620 {
2621   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2622 
2623   PetscFunctionBegin;
2624   *Ad     = a->A;
2625   *Ao     = a->B;
2626   *colmap = a->garray;
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2632 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2633 {
2634   PetscErrorCode ierr;
2635   PetscInt       i;
2636   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2637 
2638   PetscFunctionBegin;
2639   if (coloring->ctype == IS_COLORING_LOCAL) {
2640     ISColoringValue *allcolors,*colors;
2641     ISColoring      ocoloring;
2642 
2643     /* set coloring for diagonal portion */
2644     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2645 
2646     /* set coloring for off-diagonal portion */
2647     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2648     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2649     for (i=0; i<a->B->n; i++) {
2650       colors[i] = allcolors[a->garray[i]];
2651     }
2652     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2653     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2654     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2655     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2656   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2657     ISColoringValue *colors;
2658     PetscInt             *larray;
2659     ISColoring      ocoloring;
2660 
2661     /* set coloring for diagonal portion */
2662     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2663     for (i=0; i<a->A->n; i++) {
2664       larray[i] = i + a->cstart;
2665     }
2666     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2667     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2668     for (i=0; i<a->A->n; i++) {
2669       colors[i] = coloring->colors[larray[i]];
2670     }
2671     ierr = PetscFree(larray);CHKERRQ(ierr);
2672     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2673     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2674     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2675 
2676     /* set coloring for off-diagonal portion */
2677     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2678     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2679     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2680     for (i=0; i<a->B->n; i++) {
2681       colors[i] = coloring->colors[larray[i]];
2682     }
2683     ierr = PetscFree(larray);CHKERRQ(ierr);
2684     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2685     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2686     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2687   } else {
2688     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2689   }
2690 
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #if defined(PETSC_HAVE_ADIC)
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2697 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2698 {
2699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2700   PetscErrorCode ierr;
2701 
2702   PetscFunctionBegin;
2703   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2704   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2705   PetscFunctionReturn(0);
2706 }
2707 #endif
2708 
2709 #undef __FUNCT__
2710 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2711 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2712 {
2713   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2718   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatMerge"
2724 /*@C
2725       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2726                  matrices from each processor
2727 
2728     Collective on MPI_Comm
2729 
2730    Input Parameters:
2731 +    comm - the communicators the parallel matrix will live on
2732 .    inmat - the input sequential matrices
2733 .    n - number of local columns (or PETSC_DECIDE)
2734 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2735 
2736    Output Parameter:
2737 .    outmat - the parallel matrix generated
2738 
2739     Level: advanced
2740 
2741    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2742 
2743 @*/
2744 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2745 {
2746   PetscErrorCode ierr;
2747   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2748   PetscInt       *indx;
2749   PetscScalar    *values;
2750   PetscMap       columnmap,rowmap;
2751 
2752   PetscFunctionBegin;
2753     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2754   /*
2755   PetscMPIInt       rank;
2756   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2757   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2758   */
2759   if (scall == MAT_INITIAL_MATRIX){
2760     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2761     if (n == PETSC_DECIDE){
2762       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2763       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2764       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2765       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2766       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2767     }
2768 
2769     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2770     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2771     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2772     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2773     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2774 
2775     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2776     for (i=0;i<m;i++) {
2777       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2778       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2779       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2780     }
2781     /* This routine will ONLY return MPIAIJ type matrix */
2782     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2783     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2784     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2785     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2786 
2787   } else if (scall == MAT_REUSE_MATRIX){
2788     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2789   } else {
2790     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2791   }
2792 
2793   for (i=0;i<m;i++) {
2794     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2795     I    = i + rstart;
2796     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2797     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2798   }
2799   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2800   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2801   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2802 
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatFileSplit"
2808 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2809 {
2810   PetscErrorCode    ierr;
2811   PetscMPIInt       rank;
2812   PetscInt          m,N,i,rstart,nnz;
2813   size_t            len;
2814   const PetscInt    *indx;
2815   PetscViewer       out;
2816   char              *name;
2817   Mat               B;
2818   const PetscScalar *values;
2819 
2820   PetscFunctionBegin;
2821   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2822   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2823   /* Should this be the type of the diagonal block of A? */
2824   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2825   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2826   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2827   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2828   for (i=0;i<m;i++) {
2829     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2830     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2831     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2832   }
2833   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2834   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2835 
2836   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2837   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2838   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2839   sprintf(name,"%s.%d",outfile,rank);
2840   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2841   ierr = PetscFree(name);
2842   ierr = MatView(B,out);CHKERRQ(ierr);
2843   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2844   ierr = MatDestroy(B);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2851 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2852 {
2853   PetscErrorCode       ierr;
2854   Mat_Merge_SeqsToMPI  *merge;
2855   PetscObjectContainer container;
2856 
2857   PetscFunctionBegin;
2858   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2859   if (container) {
2860     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2861     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2862     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2863     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2864     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2865     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2866     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2867     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2868     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2869     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2870     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2871     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2872 
2873     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2874     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2875   }
2876   ierr = PetscFree(merge);CHKERRQ(ierr);
2877 
2878   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 #include "src/mat/utils/freespace.h"
2883 #include "petscbt.h"
2884 static PetscEvent logkey_seqstompinum = 0;
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2887 /*@C
2888       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2889                  matrices from each processor
2890 
2891     Collective on MPI_Comm
2892 
2893    Input Parameters:
2894 +    comm - the communicators the parallel matrix will live on
2895 .    seqmat - the input sequential matrices
2896 .    m - number of local rows (or PETSC_DECIDE)
2897 .    n - number of local columns (or PETSC_DECIDE)
2898 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2899 
2900    Output Parameter:
2901 .    mpimat - the parallel matrix generated
2902 
2903     Level: advanced
2904 
2905    Notes:
2906      The dimensions of the sequential matrix in each processor MUST be the same.
2907      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2908      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2909 @*/
2910 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2911 {
2912   PetscErrorCode       ierr;
2913   MPI_Comm             comm=mpimat->comm;
2914   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2915   PetscMPIInt          size,rank,taga,*len_s;
2916   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2917   PetscInt             proc,m;
2918   PetscInt             **buf_ri,**buf_rj;
2919   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2920   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2921   MPI_Request          *s_waits,*r_waits;
2922   MPI_Status           *status;
2923   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2924   Mat_Merge_SeqsToMPI  *merge;
2925   PetscObjectContainer container;
2926 
2927   PetscFunctionBegin;
2928   if (!logkey_seqstompinum) {
2929     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2930   }
2931   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2932 
2933   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2934   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2935 
2936   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2937   if (container) {
2938     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2939   }
2940   bi     = merge->bi;
2941   bj     = merge->bj;
2942   buf_ri = merge->buf_ri;
2943   buf_rj = merge->buf_rj;
2944 
2945   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2946   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2947   len_s  = merge->len_s;
2948 
2949   /* send and recv matrix values */
2950   /*-----------------------------*/
2951   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2952   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2953 
2954   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2955   for (proc=0,k=0; proc<size; proc++){
2956     if (!len_s[proc]) continue;
2957     i = owners[proc];
2958     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2959     k++;
2960   }
2961 
2962   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2963   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2964   ierr = PetscFree(status);CHKERRQ(ierr);
2965 
2966   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2967   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2968 
2969   /* insert mat values of mpimat */
2970   /*----------------------------*/
2971   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2972   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2973   nextrow = buf_ri_k + merge->nrecv;
2974   nextai  = nextrow + merge->nrecv;
2975 
2976   for (k=0; k<merge->nrecv; k++){
2977     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2978     nrows = *(buf_ri_k[k]);
2979     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2980     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2981   }
2982 
2983   /* set values of ba */
2984   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2985   for (i=0; i<m; i++) {
2986     arow = owners[rank] + i;
2987     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2988     bnzi = bi[i+1] - bi[i];
2989     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2990 
2991     /* add local non-zero vals of this proc's seqmat into ba */
2992     anzi = ai[arow+1] - ai[arow];
2993     aj   = a->j + ai[arow];
2994     aa   = a->a + ai[arow];
2995     nextaj = 0;
2996     for (j=0; nextaj<anzi; j++){
2997       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2998         ba_i[j] += aa[nextaj++];
2999       }
3000     }
3001 
3002     /* add received vals into ba */
3003     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3004       /* i-th row */
3005       if (i == *nextrow[k]) {
3006         anzi = *(nextai[k]+1) - *nextai[k];
3007         aj   = buf_rj[k] + *(nextai[k]);
3008         aa   = abuf_r[k] + *(nextai[k]);
3009         nextaj = 0;
3010         for (j=0; nextaj<anzi; j++){
3011           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
3012             ba_i[j] += aa[nextaj++];
3013           }
3014         }
3015         nextrow[k]++; nextai[k]++;
3016       }
3017     }
3018     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3019   }
3020   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3021   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3022 
3023   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3024   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3025   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3026   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 static PetscEvent logkey_seqstompisym = 0;
3031 #undef __FUNCT__
3032 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3033 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3034 {
3035   PetscErrorCode       ierr;
3036   Mat                  B_mpi;
3037   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3038   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3039   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3040   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3041   PetscInt             len,proc,*dnz,*onz;
3042   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3043   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3044   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3045   MPI_Status           *status;
3046   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3047   PetscBT              lnkbt;
3048   Mat_Merge_SeqsToMPI  *merge;
3049   PetscObjectContainer container;
3050 
3051   PetscFunctionBegin;
3052   if (!logkey_seqstompisym) {
3053     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3054   }
3055   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3056 
3057   /* make sure it is a PETSc comm */
3058   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3059   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3061 
3062   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3063   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3064 
3065   /* determine row ownership */
3066   /*---------------------------------------------------------*/
3067   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3068   if (m == PETSC_DECIDE) {
3069     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3070   } else {
3071     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3072   }
3073   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3074   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3075   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3076 
3077   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3078   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3079 
3080   /* determine the number of messages to send, their lengths */
3081   /*---------------------------------------------------------*/
3082   len_s  = merge->len_s;
3083 
3084   len = 0;  /* length of buf_si[] */
3085   merge->nsend = 0;
3086   for (proc=0; proc<size; proc++){
3087     len_si[proc] = 0;
3088     if (proc == rank){
3089       len_s[proc] = 0;
3090     } else {
3091       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3092       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3093     }
3094     if (len_s[proc]) {
3095       merge->nsend++;
3096       nrows = 0;
3097       for (i=owners[proc]; i<owners[proc+1]; i++){
3098         if (ai[i+1] > ai[i]) nrows++;
3099       }
3100       len_si[proc] = 2*(nrows+1);
3101       len += len_si[proc];
3102     }
3103   }
3104 
3105   /* determine the number and length of messages to receive for ij-structure */
3106   /*-------------------------------------------------------------------------*/
3107   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3108   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3109 
3110   /* post the Irecv of j-structure */
3111   /*-------------------------------*/
3112   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3113   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3114 
3115   /* post the Isend of j-structure */
3116   /*--------------------------------*/
3117   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3118   sj_waits = si_waits + merge->nsend;
3119 
3120   for (proc=0, k=0; proc<size; proc++){
3121     if (!len_s[proc]) continue;
3122     i = owners[proc];
3123     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3124     k++;
3125   }
3126 
3127   /* receives and sends of j-structure are complete */
3128   /*------------------------------------------------*/
3129   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3130   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3131 
3132   /* send and recv i-structure */
3133   /*---------------------------*/
3134   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3135   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3136 
3137   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3138   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3139   for (proc=0,k=0; proc<size; proc++){
3140     if (!len_s[proc]) continue;
3141     /* form outgoing message for i-structure:
3142          buf_si[0]:                 nrows to be sent
3143                [1:nrows]:           row index (global)
3144                [nrows+1:2*nrows+1]: i-structure index
3145     */
3146     /*-------------------------------------------*/
3147     nrows = len_si[proc]/2 - 1;
3148     buf_si_i    = buf_si + nrows+1;
3149     buf_si[0]   = nrows;
3150     buf_si_i[0] = 0;
3151     nrows = 0;
3152     for (i=owners[proc]; i<owners[proc+1]; i++){
3153       anzi = ai[i+1] - ai[i];
3154       if (anzi) {
3155         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3156         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3157         nrows++;
3158       }
3159     }
3160     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3161     k++;
3162     buf_si += len_si[proc];
3163   }
3164 
3165   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3166   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3167 
3168   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3169   for (i=0; i<merge->nrecv; i++){
3170     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3171   }
3172 
3173   ierr = PetscFree(len_si);CHKERRQ(ierr);
3174   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3175   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3176   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3177   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3178   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3179   ierr = PetscFree(status);CHKERRQ(ierr);
3180 
3181   /* compute a local seq matrix in each processor */
3182   /*----------------------------------------------*/
3183   /* allocate bi array and free space for accumulating nonzero column info */
3184   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3185   bi[0] = 0;
3186 
3187   /* create and initialize a linked list */
3188   nlnk = N+1;
3189   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3190 
3191   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3192   len = 0;
3193   len  = ai[owners[rank+1]] - ai[owners[rank]];
3194   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3195   current_space = free_space;
3196 
3197   /* determine symbolic info for each local row */
3198   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3199   nextrow = buf_ri_k + merge->nrecv;
3200   nextai  = nextrow + merge->nrecv;
3201   for (k=0; k<merge->nrecv; k++){
3202     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3203     nrows = *buf_ri_k[k];
3204     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3205     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3206   }
3207 
3208   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3209   len = 0;
3210   for (i=0;i<m;i++) {
3211     bnzi   = 0;
3212     /* add local non-zero cols of this proc's seqmat into lnk */
3213     arow   = owners[rank] + i;
3214     anzi   = ai[arow+1] - ai[arow];
3215     aj     = a->j + ai[arow];
3216     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3217     bnzi += nlnk;
3218     /* add received col data into lnk */
3219     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3220       if (i == *nextrow[k]) { /* i-th row */
3221         anzi = *(nextai[k]+1) - *nextai[k];
3222         aj   = buf_rj[k] + *nextai[k];
3223         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3224         bnzi += nlnk;
3225         nextrow[k]++; nextai[k]++;
3226       }
3227     }
3228     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3229 
3230     /* if free space is not available, make more free space */
3231     if (current_space->local_remaining<bnzi) {
3232       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3233       nspacedouble++;
3234     }
3235     /* copy data into free space, then initialize lnk */
3236     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3237     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3238 
3239     current_space->array           += bnzi;
3240     current_space->local_used      += bnzi;
3241     current_space->local_remaining -= bnzi;
3242 
3243     bi[i+1] = bi[i] + bnzi;
3244   }
3245 
3246   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3247 
3248   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3249   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3250   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3251 
3252   /* create symbolic parallel matrix B_mpi */
3253   /*---------------------------------------*/
3254   if (n==PETSC_DECIDE) {
3255     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3256   } else {
3257     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3258   }
3259   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3260   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3261   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3262 
3263   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3264   B_mpi->assembled     = PETSC_FALSE;
3265   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3266   merge->bi            = bi;
3267   merge->bj            = bj;
3268   merge->buf_ri        = buf_ri;
3269   merge->buf_rj        = buf_rj;
3270   merge->coi           = PETSC_NULL;
3271   merge->coj           = PETSC_NULL;
3272   merge->owners_co     = PETSC_NULL;
3273 
3274   /* attach the supporting struct to B_mpi for reuse */
3275   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3276   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3277   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3278   *mpimat = B_mpi;
3279 
3280   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3281   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3282   PetscFunctionReturn(0);
3283 }
3284 
3285 static PetscEvent logkey_seqstompi = 0;
3286 #undef __FUNCT__
3287 #define __FUNCT__ "MatMerge_SeqsToMPI"
3288 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3289 {
3290   PetscErrorCode   ierr;
3291 
3292   PetscFunctionBegin;
3293   if (!logkey_seqstompi) {
3294     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3295   }
3296   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3297   if (scall == MAT_INITIAL_MATRIX){
3298     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3299   }
3300   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3301   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 static PetscEvent logkey_getlocalmat = 0;
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatGetLocalMat"
3307 /*@C
3308      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3309 
3310     Not Collective
3311 
3312    Input Parameters:
3313 +    A - the matrix
3314 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3315 
3316    Output Parameter:
3317 .    A_loc - the local sequential matrix generated
3318 
3319     Level: developer
3320 
3321 @*/
3322 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3323 {
3324   PetscErrorCode  ierr;
3325   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3326   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3327   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3328   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3329   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3330   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3331 
3332   PetscFunctionBegin;
3333   if (!logkey_getlocalmat) {
3334     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3335   }
3336   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3337   if (scall == MAT_INITIAL_MATRIX){
3338     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3339     ci[0] = 0;
3340     for (i=0; i<am; i++){
3341       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3342     }
3343     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3344     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3345     k = 0;
3346     for (i=0; i<am; i++) {
3347       ncols_o = bi[i+1] - bi[i];
3348       ncols_d = ai[i+1] - ai[i];
3349       /* off-diagonal portion of A */
3350       for (jo=0; jo<ncols_o; jo++) {
3351         col = cmap[*bj];
3352         if (col >= cstart) break;
3353         cj[k]   = col; bj++;
3354         ca[k++] = *ba++;
3355       }
3356       /* diagonal portion of A */
3357       for (j=0; j<ncols_d; j++) {
3358         cj[k]   = cstart + *aj++;
3359         ca[k++] = *aa++;
3360       }
3361       /* off-diagonal portion of A */
3362       for (j=jo; j<ncols_o; j++) {
3363         cj[k]   = cmap[*bj++];
3364         ca[k++] = *ba++;
3365       }
3366     }
3367     /* put together the new matrix */
3368     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3369     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3370     /* Since these are PETSc arrays, change flags to free them as necessary. */
3371     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3372     mat->freedata = PETSC_TRUE;
3373     mat->nonew    = 0;
3374   } else if (scall == MAT_REUSE_MATRIX){
3375     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3376     ci = mat->i; cj = mat->j; ca = mat->a;
3377     for (i=0; i<am; i++) {
3378       /* off-diagonal portion of A */
3379       ncols_o = bi[i+1] - bi[i];
3380       for (jo=0; jo<ncols_o; jo++) {
3381         col = cmap[*bj];
3382         if (col >= cstart) break;
3383         *ca++ = *ba++; bj++;
3384       }
3385       /* diagonal portion of A */
3386       ncols_d = ai[i+1] - ai[i];
3387       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3388       /* off-diagonal portion of A */
3389       for (j=jo; j<ncols_o; j++) {
3390         *ca++ = *ba++; bj++;
3391       }
3392     }
3393   } else {
3394     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3395   }
3396 
3397   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3398   PetscFunctionReturn(0);
3399 }
3400 
3401 static PetscEvent logkey_getlocalmatcondensed = 0;
3402 #undef __FUNCT__
3403 #define __FUNCT__ "MatGetLocalMatCondensed"
3404 /*@C
3405      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3406 
3407     Not Collective
3408 
3409    Input Parameters:
3410 +    A - the matrix
3411 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3412 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3413 
3414    Output Parameter:
3415 .    A_loc - the local sequential matrix generated
3416 
3417     Level: developer
3418 
3419 @*/
3420 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3421 {
3422   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3423   PetscErrorCode    ierr;
3424   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3425   IS                isrowa,iscola;
3426   Mat               *aloc;
3427 
3428   PetscFunctionBegin;
3429   if (!logkey_getlocalmatcondensed) {
3430     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3431   }
3432   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3433   if (!row){
3434     start = a->rstart; end = a->rend;
3435     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3436   } else {
3437     isrowa = *row;
3438   }
3439   if (!col){
3440     start = a->cstart;
3441     cmap  = a->garray;
3442     nzA   = a->A->n;
3443     nzB   = a->B->n;
3444     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3445     ncols = 0;
3446     for (i=0; i<nzB; i++) {
3447       if (cmap[i] < start) idx[ncols++] = cmap[i];
3448       else break;
3449     }
3450     imark = i;
3451     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3452     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3453     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3454     ierr = PetscFree(idx);CHKERRQ(ierr);
3455   } else {
3456     iscola = *col;
3457   }
3458   if (scall != MAT_INITIAL_MATRIX){
3459     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3460     aloc[0] = *A_loc;
3461   }
3462   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3463   *A_loc = aloc[0];
3464   ierr = PetscFree(aloc);CHKERRQ(ierr);
3465   if (!row){
3466     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3467   }
3468   if (!col){
3469     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3470   }
3471   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3472   PetscFunctionReturn(0);
3473 }
3474 
3475 static PetscEvent logkey_GetBrowsOfAcols = 0;
3476 #undef __FUNCT__
3477 #define __FUNCT__ "MatGetBrowsOfAcols"
3478 /*@C
3479     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3480 
3481     Collective on Mat
3482 
3483    Input Parameters:
3484 +    A,B - the matrices in mpiaij format
3485 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3486 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3487 
3488    Output Parameter:
3489 +    rowb, colb - index sets of rows and columns of B to extract
3490 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3491 -    B_seq - the sequential matrix generated
3492 
3493     Level: developer
3494 
3495 @*/
3496 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3497 {
3498   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3499   PetscErrorCode    ierr;
3500   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3501   IS                isrowb,iscolb;
3502   Mat               *bseq;
3503 
3504   PetscFunctionBegin;
3505   if (a->cstart != b->rstart || a->cend != b->rend){
3506     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3507   }
3508   if (!logkey_GetBrowsOfAcols) {
3509     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3510   }
3511   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3512 
3513   if (scall == MAT_INITIAL_MATRIX){
3514     start = a->cstart;
3515     cmap  = a->garray;
3516     nzA   = a->A->n;
3517     nzB   = a->B->n;
3518     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3519     ncols = 0;
3520     for (i=0; i<nzB; i++) {  /* row < local row index */
3521       if (cmap[i] < start) idx[ncols++] = cmap[i];
3522       else break;
3523     }
3524     imark = i;
3525     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3526     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3527     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3528     ierr = PetscFree(idx);CHKERRQ(ierr);
3529     *brstart = imark;
3530     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3531   } else {
3532     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3533     isrowb = *rowb; iscolb = *colb;
3534     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3535     bseq[0] = *B_seq;
3536   }
3537   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3538   *B_seq = bseq[0];
3539   ierr = PetscFree(bseq);CHKERRQ(ierr);
3540   if (!rowb){
3541     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3542   } else {
3543     *rowb = isrowb;
3544   }
3545   if (!colb){
3546     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3547   } else {
3548     *colb = iscolb;
3549   }
3550   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3551   PetscFunctionReturn(0);
3552 }
3553 
3554 static PetscEvent logkey_GetBrowsOfAocols = 0;
3555 #undef __FUNCT__
3556 #define __FUNCT__ "MatGetBrowsOfAoCols"
3557 /*@C
3558     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3559     of the OFF-DIAGONAL portion of local A
3560 
3561     Collective on Mat
3562 
3563    Input Parameters:
3564 +    A,B - the matrices in mpiaij format
3565 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3566 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3567 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3568 
3569    Output Parameter:
3570 +    B_oth - the sequential matrix generated
3571 
3572     Level: developer
3573 
3574 @*/
3575 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3576 {
3577   VecScatter_MPI_General *gen_to,*gen_from;
3578   PetscErrorCode         ierr;
3579   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3580   Mat_SeqAIJ             *b_oth;
3581   VecScatter             ctx=a->Mvctx;
3582   MPI_Comm               comm=ctx->comm;
3583   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3584   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3585   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3586   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3587   MPI_Request            *rwaits,*swaits;
3588   MPI_Status             *sstatus,rstatus;
3589   PetscInt               *cols;
3590   PetscScalar            *vals;
3591   PetscMPIInt            j;
3592 
3593   PetscFunctionBegin;
3594   if (a->cstart != b->rstart || a->cend != b->rend){
3595     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3596   }
3597   if (!logkey_GetBrowsOfAocols) {
3598     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3599   }
3600   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3601   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3602 
3603   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3604   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3605   rvalues  = gen_from->values; /* holds the length of sending row */
3606   svalues  = gen_to->values;   /* holds the length of receiving row */
3607   nrecvs   = gen_from->n;
3608   nsends   = gen_to->n;
3609   rwaits   = gen_from->requests;
3610   swaits   = gen_to->requests;
3611   srow     = gen_to->indices;   /* local row index to be sent */
3612   rstarts  = gen_from->starts;
3613   sstarts  = gen_to->starts;
3614   rprocs   = gen_from->procs;
3615   sprocs   = gen_to->procs;
3616   sstatus  = gen_to->sstatus;
3617 
3618   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3619   if (scall == MAT_INITIAL_MATRIX){
3620     /* i-array */
3621     /*---------*/
3622     /*  post receives */
3623     for (i=0; i<nrecvs; i++){
3624       rowlen = (PetscInt*)rvalues + rstarts[i];
3625       nrows = rstarts[i+1]-rstarts[i];
3626       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3627     }
3628 
3629     /* pack the outgoing message */
3630     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3631     rstartsj = sstartsj + nsends +1;
3632     sstartsj[0] = 0;  rstartsj[0] = 0;
3633     len = 0; /* total length of j or a array to be sent */
3634     k = 0;
3635     for (i=0; i<nsends; i++){
3636       rowlen = (PetscInt*)svalues + sstarts[i];
3637       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3638       for (j=0; j<nrows; j++) {
3639         row = srow[k] + b->rowners[rank]; /* global row idx */
3640         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3641         len += rowlen[j];
3642         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3643         k++;
3644       }
3645       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3646        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3647     }
3648     /* recvs and sends of i-array are completed */
3649     i = nrecvs;
3650     while (i--) {
3651       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3652     }
3653     if (nsends) {
3654       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3655     }
3656     /* allocate buffers for sending j and a arrays */
3657     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3658     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3659 
3660     /* create i-array of B_oth */
3661     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3662     b_othi[0] = 0;
3663     len = 0; /* total length of j or a array to be received */
3664     k = 0;
3665     for (i=0; i<nrecvs; i++){
3666       rowlen = (PetscInt*)rvalues + rstarts[i];
3667       nrows = rstarts[i+1]-rstarts[i];
3668       for (j=0; j<nrows; j++) {
3669         b_othi[k+1] = b_othi[k] + rowlen[j];
3670         len += rowlen[j]; k++;
3671       }
3672       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3673     }
3674 
3675     /* allocate space for j and a arrrays of B_oth */
3676     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3677     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3678 
3679     /* j-array */
3680     /*---------*/
3681     /*  post receives of j-array */
3682     for (i=0; i<nrecvs; i++){
3683       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3684       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3685     }
3686     k = 0;
3687     for (i=0; i<nsends; i++){
3688       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3689       bufJ = bufj+sstartsj[i];
3690       for (j=0; j<nrows; j++) {
3691         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3692         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3693         for (l=0; l<ncols; l++){
3694           *bufJ++ = cols[l];
3695         }
3696         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3697       }
3698       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3699     }
3700 
3701     /* recvs and sends of j-array are completed */
3702     i = nrecvs;
3703     while (i--) {
3704       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3705     }
3706     if (nsends) {
3707       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3708     }
3709   } else if (scall == MAT_REUSE_MATRIX){
3710     sstartsj = *startsj;
3711     rstartsj = sstartsj + nsends +1;
3712     bufa     = *bufa_ptr;
3713     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3714     b_otha   = b_oth->a;
3715   } else {
3716     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3717   }
3718 
3719   /* a-array */
3720   /*---------*/
3721   /*  post receives of a-array */
3722   for (i=0; i<nrecvs; i++){
3723     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3724     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3725   }
3726   k = 0;
3727   for (i=0; i<nsends; i++){
3728     nrows = sstarts[i+1]-sstarts[i];
3729     bufA = bufa+sstartsj[i];
3730     for (j=0; j<nrows; j++) {
3731       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3732       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3733       for (l=0; l<ncols; l++){
3734         *bufA++ = vals[l];
3735       }
3736       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3737 
3738     }
3739     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3740   }
3741   /* recvs and sends of a-array are completed */
3742   i = nrecvs;
3743   while (i--) {
3744     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3745   }
3746    if (nsends) {
3747     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3748   }
3749 
3750   if (scall == MAT_INITIAL_MATRIX){
3751     /* put together the new matrix */
3752     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3753 
3754     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3755     /* Since these are PETSc arrays, change flags to free them as necessary. */
3756     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3757     b_oth->freedata = PETSC_TRUE;
3758     b_oth->nonew    = 0;
3759 
3760     ierr = PetscFree(bufj);CHKERRQ(ierr);
3761     if (!startsj || !bufa_ptr){
3762       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3763       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3764     } else {
3765       *startsj  = sstartsj;
3766       *bufa_ptr = bufa;
3767     }
3768   }
3769   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3770 
3771   PetscFunctionReturn(0);
3772 }
3773 
3774 /*MC
3775    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3776 
3777    Options Database Keys:
3778 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3779 
3780   Level: beginner
3781 
3782 .seealso: MatCreateMPIAIJ
3783 M*/
3784 
3785 EXTERN_C_BEGIN
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatCreate_MPIAIJ"
3788 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3789 {
3790   Mat_MPIAIJ     *b;
3791   PetscErrorCode ierr;
3792   PetscInt       i;
3793   PetscMPIInt    size;
3794 
3795   PetscFunctionBegin;
3796   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3797 
3798   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3799   B->data         = (void*)b;
3800   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3801   B->factor       = 0;
3802   B->bs           = 1;
3803   B->assembled    = PETSC_FALSE;
3804   B->mapping      = 0;
3805 
3806   B->insertmode      = NOT_SET_VALUES;
3807   b->size            = size;
3808   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3809 
3810   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3811   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3812 
3813   /* the information in the maps duplicates the information computed below, eventually
3814      we should remove the duplicate information that is not contained in the maps */
3815   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3816   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3817 
3818   /* build local table of row and column ownerships */
3819   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3820   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3821   b->cowners = b->rowners + b->size + 2;
3822   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3823   b->rowners[0] = 0;
3824   for (i=2; i<=b->size; i++) {
3825     b->rowners[i] += b->rowners[i-1];
3826   }
3827   b->rstart = b->rowners[b->rank];
3828   b->rend   = b->rowners[b->rank+1];
3829   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3830   b->cowners[0] = 0;
3831   for (i=2; i<=b->size; i++) {
3832     b->cowners[i] += b->cowners[i-1];
3833   }
3834   b->cstart = b->cowners[b->rank];
3835   b->cend   = b->cowners[b->rank+1];
3836 
3837   /* build cache for off array entries formed */
3838   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3839   b->donotstash  = PETSC_FALSE;
3840   b->colmap      = 0;
3841   b->garray      = 0;
3842   b->roworiented = PETSC_TRUE;
3843 
3844   /* stuff used for matrix vector multiply */
3845   b->lvec      = PETSC_NULL;
3846   b->Mvctx     = PETSC_NULL;
3847 
3848   /* stuff for MatGetRow() */
3849   b->rowindices   = 0;
3850   b->rowvalues    = 0;
3851   b->getrowactive = PETSC_FALSE;
3852 
3853   /* Explicitly create 2 MATSEQAIJ matrices. */
3854   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3855   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3856   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3857   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3858   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3859   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3860 
3861   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3862                                      "MatStoreValues_MPIAIJ",
3863                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3864   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3865                                      "MatRetrieveValues_MPIAIJ",
3866                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3867   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3868 				     "MatGetDiagonalBlock_MPIAIJ",
3869                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3870   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3871 				     "MatIsTranspose_MPIAIJ",
3872 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3873   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3874 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3875 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3876   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3877 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3878 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3879   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3880 				     "MatDiagonalScaleLocal_MPIAIJ",
3881 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3882   PetscFunctionReturn(0);
3883 }
3884 EXTERN_C_END
3885