xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6ab11f9b6ca3a3a4762ba278e5cdc46bee711a5b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 #define CHUNKSIZE   15
37 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
38 { \
39     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
40     lastcol1 = col;\
41     while (high1-low1 > 5) { \
42       t = (low1+high1)/2; \
43       if (rp1[t] > col) high1 = t; \
44       else             low1  = t; \
45     } \
46       for (_i=low1; _i<high1; _i++) { \
47         if (rp1[_i] > col) break; \
48         if (rp1[_i] == col) { \
49           if (addv == ADD_VALUES) ap1[_i] += value;   \
50           else                    ap1[_i] = value; \
51           goto a_noinsert; \
52         } \
53       }  \
54       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
55       if (nonew == 1) goto a_noinsert; \
56       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
57       if (nrow1 >= rmax1) { \
58         /* there is no extra room in row, therefore enlarge */ \
59         PetscInt    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
60         PetscScalar *new_a; \
61  \
62         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
63  \
64         /* malloc new storage space */ \
65         ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,am+1,PetscInt,&new_i);CHKERRQ(ierr);\
66  \
67         /* copy over old data into new slots */ \
68         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
69         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
70         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow1)*sizeof(PetscInt));CHKERRQ(ierr); \
71         len = (new_nz - CHUNKSIZE - ai[row] - nrow1); \
72         ierr = PetscMemcpy(new_j+ai[row]+nrow1+CHUNKSIZE,aj+ai[row]+nrow1,len*sizeof(PetscInt));CHKERRQ(ierr); \
73         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow1)*sizeof(PetscScalar));CHKERRQ(ierr); \
74         ierr = PetscMemcpy(new_a+ai[row]+nrow1+CHUNKSIZE,aa+ai[row]+nrow1,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
75         /* free up old matrix storage */ \
76         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\
77         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
78         a->singlemalloc = PETSC_TRUE; \
79  \
80         rp1   = aj + ai[row]; ap1 = aa + ai[row]; \
81         rmax1 = aimax[row] = aimax[row] + CHUNKSIZE; \
82         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
83         a->maxnz += CHUNKSIZE; \
84         a->reallocs++; \
85       } \
86       N = nrow1++ - 1; a->nz++; \
87       /* shift up all the later entries in this row */ \
88       for (ii=N; ii>=_i; ii--) { \
89         rp1[ii+1] = rp1[ii]; \
90         ap1[ii+1] = ap1[ii]; \
91       } \
92       rp1[_i] = col;  \
93       ap1[_i] = value;  \
94       a_noinsert: ; \
95       ailen[row] = nrow1; \
96 }
97 
98 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
99 { \
100     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
101     lastcol2 = col;\
102     while (high2-low2 > 5) { \
103       t = (low2+high2)/2; \
104       if (rp2[t] > col) high2 = t; \
105       else             low2  = t; \
106     } \
107        for (_i=low2; _i<high2; _i++) { \
108         if (rp2[_i] > col) break; \
109         if (rp2[_i] == col) { \
110           if (addv == ADD_VALUES) ap2[_i] += value;   \
111           else                    ap2[_i] = value; \
112           goto b_noinsert; \
113         } \
114       }  \
115       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
116       if (nonew == 1) goto b_noinsert; \
117       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       if (nrow2 >= rmax2) { \
119         /* there is no extra room in row, therefore enlarge */ \
120         PetscInt    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
121         PetscScalar *new_a; \
122  \
123         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
124  \
125         /* malloc new storage space */ \
126         ierr = PetscMalloc3(new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,bm+1,PetscInt,&new_i);CHKERRQ(ierr);\
127  \
128         /* copy over old data into new slots */ \
129         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
130         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
131         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow2)*sizeof(PetscInt));CHKERRQ(ierr); \
132         len = (new_nz - CHUNKSIZE - bi[row] - nrow2); \
133         ierr = PetscMemcpy(new_j+bi[row]+nrow2+CHUNKSIZE,bj+bi[row]+nrow2,len*sizeof(PetscInt));CHKERRQ(ierr); \
134         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow2)*sizeof(PetscScalar));CHKERRQ(ierr); \
135         ierr = PetscMemcpy(new_a+bi[row]+nrow2+CHUNKSIZE,ba+bi[row]+nrow2,len*sizeof(PetscScalar));CHKERRQ(ierr);  \
136         /* free up old matrix storage */ \
137         ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\
138         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
139         b->singlemalloc = PETSC_TRUE; \
140  \
141         rp2   = bj + bi[row]; ap2 = ba + bi[row]; \
142         rmax2 = bimax[row] = bimax[row] + CHUNKSIZE; \
143         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + sizeof(PetscScalar)));CHKERRQ(ierr); \
144         b->maxnz += CHUNKSIZE; \
145         b->reallocs++; \
146       } \
147       N = nrow2++ - 1; b->nz++; \
148       /* shift up all the later entries in this row */ \
149       for (ii=N; ii>=_i; ii--) { \
150         rp2[ii+1] = rp2[ii]; \
151         ap2[ii+1] = ap2[ii]; \
152       } \
153       rp2[_i] = col;  \
154       ap2[_i] = value;  \
155       b_noinsert: ; \
156       bilen[row] = nrow2; \
157 }
158 
159 #undef __FUNCT__
160 #define __FUNCT__ "MatSetValues_MPIAIJ"
161 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
162 {
163   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
164   PetscScalar    value;
165   PetscErrorCode ierr;
166   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
167   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
168   PetscTruth     roworiented = aij->roworiented;
169 
170   /* Some Variables required in the macro */
171   Mat            A = aij->A;
172   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
173   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
174   PetscScalar    *aa = a->a;
175   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
176   Mat            B = aij->B;
177   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
178   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
179   PetscScalar    *ba = b->a;
180 
181   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
182   PetscInt       nonew = a->nonew;
183   PetscScalar    *ap1,*ap2;
184 
185   PetscFunctionBegin;
186   for (i=0; i<m; i++) {
187     if (im[i] < 0) continue;
188 #if defined(PETSC_USE_DEBUG)
189     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
190 #endif
191     if (im[i] >= rstart && im[i] < rend) {
192       row      = im[i] - rstart;
193       lastcol1 = -1;
194       rp1      = aj + ai[row];
195       ap1      = aa + ai[row];
196       rmax1    = aimax[row];
197       nrow1    = ailen[row];
198       low1     = 0;
199       high1    = nrow1;
200       lastcol2 = -1;
201       rp2      = bj + bi[row];
202       ap2      = ba + bi[row];
203       rmax2    = bimax[row];
204       nrow2    = bilen[row];
205       low2     = 0;
206       high2    = nrow2;
207 
208       for (j=0; j<n; j++) {
209         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
210         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
211         if (in[j] >= cstart && in[j] < cend){
212           col = in[j] - cstart;
213           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
214         } else if (in[j] < 0) continue;
215 #if defined(PETSC_USE_DEBUG)
216         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
217 #endif
218         else {
219           if (mat->was_assembled) {
220             if (!aij->colmap) {
221               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
222             }
223 #if defined (PETSC_USE_CTABLE)
224             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
225 	    col--;
226 #else
227             col = aij->colmap[in[j]] - 1;
228 #endif
229             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
230               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
231               col =  in[j];
232               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
233               B = aij->B;
234               b = (Mat_SeqAIJ*)B->data;
235               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
236               rp2      = bj + bi[row];
237               ap2      = ba + bi[row];
238               rmax2    = bimax[row];
239               nrow2    = bilen[row];
240               low2     = 0;
241               high2    = nrow2;
242               ba = b->a;
243             }
244           } else col = in[j];
245           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
246         }
247       }
248     } else {
249       if (!aij->donotstash) {
250         if (roworiented) {
251           if (ignorezeroentries && v[i*n] == 0.0) continue;
252           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
253         } else {
254           if (ignorezeroentries && v[i] == 0.0) continue;
255           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
256         }
257       }
258     }
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatGetValues_MPIAIJ"
265 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
266 {
267   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
268   PetscErrorCode ierr;
269   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
270   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
271 
272   PetscFunctionBegin;
273   for (i=0; i<m; i++) {
274     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
275     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
276     if (idxm[i] >= rstart && idxm[i] < rend) {
277       row = idxm[i] - rstart;
278       for (j=0; j<n; j++) {
279         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
280         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
281         if (idxn[j] >= cstart && idxn[j] < cend){
282           col = idxn[j] - cstart;
283           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
284         } else {
285           if (!aij->colmap) {
286             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
287           }
288 #if defined (PETSC_USE_CTABLE)
289           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
290           col --;
291 #else
292           col = aij->colmap[idxn[j]] - 1;
293 #endif
294           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
295           else {
296             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
297           }
298         }
299       }
300     } else {
301       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
302     }
303   }
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
309 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
310 {
311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       nstash,reallocs;
314   InsertMode     addv;
315 
316   PetscFunctionBegin;
317   if (aij->donotstash) {
318     PetscFunctionReturn(0);
319   }
320 
321   /* make sure all processors are either in INSERTMODE or ADDMODE */
322   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
323   if (addv == (ADD_VALUES|INSERT_VALUES)) {
324     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
325   }
326   mat->insertmode = addv; /* in case this processor had no cache */
327 
328   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
329   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
330   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
331   PetscFunctionReturn(0);
332 }
333 
334 #undef __FUNCT__
335 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
336 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
337 {
338   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
339   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
340   PetscErrorCode ierr;
341   PetscMPIInt    n;
342   PetscInt       i,j,rstart,ncols,flg;
343   PetscInt       *row,*col,other_disassembled;
344   PetscScalar    *val;
345   InsertMode     addv = mat->insertmode;
346 
347   PetscFunctionBegin;
348   if (!aij->donotstash) {
349     while (1) {
350       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
351       if (!flg) break;
352 
353       for (i=0; i<n;) {
354         /* Now identify the consecutive vals belonging to the same row */
355         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
356         if (j < n) ncols = j-i;
357         else       ncols = n-i;
358         /* Now assemble all these values with a single function call */
359         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
360         i = j;
361       }
362     }
363     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
364   }
365   a->compressedrow.use     = PETSC_FALSE;
366   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
367   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
368 
369   /* determine if any processor has disassembled, if so we must
370      also disassemble ourselfs, in order that we may reassemble. */
371   /*
372      if nonzero structure of submatrix B cannot change then we know that
373      no processor disassembled thus we can skip this stuff
374   */
375   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
376     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
377     if (mat->was_assembled && !other_disassembled) {
378       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
379     }
380   }
381   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
382   b    = (Mat_SeqAIJ *)aij->B->data;
383 
384   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
385     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
386   }
387   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
388   b->compressedrow.use = PETSC_TRUE;
389   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
390   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
391 
392   if (aij->rowvalues) {
393     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
394     aij->rowvalues = 0;
395   }
396 
397   /* used by MatAXPY() */
398   a->xtoy = 0; b->xtoy = 0;
399   a->XtoY = 0; b->XtoY = 0;
400 
401   PetscFunctionReturn(0);
402 }
403 
404 #undef __FUNCT__
405 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
406 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
407 {
408   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
409   PetscErrorCode ierr;
410 
411   PetscFunctionBegin;
412   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
413   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatZeroRows_MPIAIJ"
419 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
420 {
421   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
422   PetscErrorCode ierr;
423   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
424   PetscInt       i,N,*rows,*owners = l->rowners;
425   PetscInt       *nprocs,j,idx,nsends,row;
426   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
427   PetscInt       *rvalues,count,base,slen,*source;
428   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
429   MPI_Comm       comm = A->comm;
430   MPI_Request    *send_waits,*recv_waits;
431   MPI_Status     recv_status,*send_status;
432   IS             istmp;
433 #if defined(PETSC_DEBUG)
434   PetscTruth     found = PETSC_FALSE;
435 #endif
436 
437   PetscFunctionBegin;
438   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
439   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
440 
441   /*  first count number of contributors to each processor */
442   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
443   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
444   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
445   j = 0;
446   for (i=0; i<N; i++) {
447     if (lastidx > (idx = rows[i])) j = 0;
448     lastidx = idx;
449     for (; j<size; j++) {
450       if (idx >= owners[j] && idx < owners[j+1]) {
451         nprocs[2*j]++;
452         nprocs[2*j+1] = 1;
453         owner[i] = j;
454 #if defined(PETSC_DEBUG)
455         found = PETSC_TRUE;
456 #endif
457         break;
458       }
459     }
460 #if defined(PETSC_DEBUG)
461     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
462     found = PETSC_FALSE;
463 #endif
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
583   PetscErrorCode ierr;
584   PetscInt       nt;
585 
586   PetscFunctionBegin;
587   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
588   if (nt != A->n) {
589     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
590   }
591   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
592   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
593   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
594   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
595   PetscFunctionReturn(0);
596 }
597 
598 #undef __FUNCT__
599 #define __FUNCT__ "MatMultAdd_MPIAIJ"
600 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
601 {
602   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
603   PetscErrorCode ierr;
604 
605   PetscFunctionBegin;
606   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
607   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
608   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
609   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
615 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
616 {
617   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
618   PetscErrorCode ierr;
619   PetscTruth     merged;
620 
621   PetscFunctionBegin;
622   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
623   /* do nondiagonal part */
624   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
625   if (!merged) {
626     /* send it on its way */
627     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
628     /* do local part */
629     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
630     /* receive remote parts: note this assumes the values are not actually */
631     /* added in yy until the next line, */
632     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
633   } else {
634     /* do local part */
635     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
636     /* send it on its way */
637     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
638     /* values actually were received in the Begin() but we need to call this nop */
639     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
640   }
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
647 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
648 {
649   MPI_Comm       comm;
650   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
651   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
652   IS             Me,Notme;
653   PetscErrorCode ierr;
654   PetscInt       M,N,first,last,*notme,i;
655   PetscMPIInt    size;
656 
657   PetscFunctionBegin;
658 
659   /* Easy test: symmetric diagonal block */
660   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
661   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
662   if (!*f) PetscFunctionReturn(0);
663   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
665   if (size == 1) PetscFunctionReturn(0);
666 
667   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
668   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
669   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
670   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
671   for (i=0; i<first; i++) notme[i] = i;
672   for (i=last; i<M; i++) notme[i-last+first] = i;
673   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
674   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
675   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
676   Aoff = Aoffs[0];
677   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
678   Boff = Boffs[0];
679   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
680   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
681   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
682   ierr = ISDestroy(Me);CHKERRQ(ierr);
683   ierr = ISDestroy(Notme);CHKERRQ(ierr);
684 
685   PetscFunctionReturn(0);
686 }
687 EXTERN_C_END
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
691 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
692 {
693   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697   /* do nondiagonal part */
698   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
699   /* send it on its way */
700   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
701   /* do local part */
702   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
703   /* receive remote parts */
704   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 /*
709   This only works correctly for square matrices where the subblock A->A is the
710    diagonal block
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
714 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
715 {
716   PetscErrorCode ierr;
717   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
718 
719   PetscFunctionBegin;
720   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
721   if (a->rstart != a->cstart || a->rend != a->cend) {
722     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
723   }
724   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatScale_MPIAIJ"
730 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
731 {
732   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
733   PetscErrorCode ierr;
734 
735   PetscFunctionBegin;
736   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
737   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDestroy_MPIAIJ"
743 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
744 {
745   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749 #if defined(PETSC_USE_LOG)
750   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
751 #endif
752   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
753   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
754   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
755   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
756 #if defined (PETSC_USE_CTABLE)
757   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
758 #else
759   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
760 #endif
761   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
762   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
763   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
764   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
765   ierr = PetscFree(aij);CHKERRQ(ierr);
766 
767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
771   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
772   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
774   PetscFunctionReturn(0);
775 }
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatView_MPIAIJ_Binary"
779 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
780 {
781   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
782   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
783   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
784   PetscErrorCode    ierr;
785   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
786   int               fd;
787   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
788   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
789   PetscScalar       *column_values;
790 
791   PetscFunctionBegin;
792   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
793   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
794   nz   = A->nz + B->nz;
795   if (!rank) {
796     header[0] = MAT_FILE_COOKIE;
797     header[1] = mat->M;
798     header[2] = mat->N;
799     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
800     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
801     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
802     /* get largest number of rows any processor has */
803     rlen = mat->m;
804     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
805     for (i=1; i<size; i++) {
806       rlen = PetscMax(rlen,range[i+1] - range[i]);
807     }
808   } else {
809     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
810     rlen = mat->m;
811   }
812 
813   /* load up the local row counts */
814   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
815   for (i=0; i<mat->m; i++) {
816     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
817   }
818 
819   /* store the row lengths to the file */
820   if (!rank) {
821     MPI_Status status;
822     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
823     for (i=1; i<size; i++) {
824       rlen = range[i+1] - range[i];
825       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
826       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
827     }
828   } else {
829     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
830   }
831   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
832 
833   /* load up the local column indices */
834   nzmax = nz; /* )th processor needs space a largest processor needs */
835   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
836   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
837   cnt  = 0;
838   for (i=0; i<mat->m; i++) {
839     for (j=B->i[i]; j<B->i[i+1]; j++) {
840       if ( (col = garray[B->j[j]]) > cstart) break;
841       column_indices[cnt++] = col;
842     }
843     for (k=A->i[i]; k<A->i[i+1]; k++) {
844       column_indices[cnt++] = A->j[k] + cstart;
845     }
846     for (; j<B->i[i+1]; j++) {
847       column_indices[cnt++] = garray[B->j[j]];
848     }
849   }
850   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
851 
852   /* store the column indices to the file */
853   if (!rank) {
854     MPI_Status status;
855     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
856     for (i=1; i<size; i++) {
857       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
858       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
859       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
860       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
861     }
862   } else {
863     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
864     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
865   }
866   ierr = PetscFree(column_indices);CHKERRQ(ierr);
867 
868   /* load up the local column values */
869   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
870   cnt  = 0;
871   for (i=0; i<mat->m; i++) {
872     for (j=B->i[i]; j<B->i[i+1]; j++) {
873       if ( garray[B->j[j]] > cstart) break;
874       column_values[cnt++] = B->a[j];
875     }
876     for (k=A->i[i]; k<A->i[i+1]; k++) {
877       column_values[cnt++] = A->a[k];
878     }
879     for (; j<B->i[i+1]; j++) {
880       column_values[cnt++] = B->a[j];
881     }
882   }
883   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
884 
885   /* store the column values to the file */
886   if (!rank) {
887     MPI_Status status;
888     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
889     for (i=1; i<size; i++) {
890       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
891       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
892       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
893       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
894     }
895   } else {
896     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
897     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(column_values);CHKERRQ(ierr);
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
905 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
906 {
907   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
908   PetscErrorCode    ierr;
909   PetscMPIInt       rank = aij->rank,size = aij->size;
910   PetscTruth        isdraw,iascii,isbinary;
911   PetscViewer       sviewer;
912   PetscViewerFormat format;
913 
914   PetscFunctionBegin;
915   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
916   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
917   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
918   if (iascii) {
919     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
920     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
921       MatInfo    info;
922       PetscTruth inodes;
923 
924       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
925       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
926       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
927       if (!inodes) {
928         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
929 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
930       } else {
931         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
932 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
933       }
934       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
935       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
936       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
937       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
938       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
939       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
940       PetscFunctionReturn(0);
941     } else if (format == PETSC_VIEWER_ASCII_INFO) {
942       PetscInt   inodecount,inodelimit,*inodes;
943       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
944       if (inodes) {
945         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
946       } else {
947         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
948       }
949       PetscFunctionReturn(0);
950     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
951       PetscFunctionReturn(0);
952     }
953   } else if (isbinary) {
954     if (size == 1) {
955       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
956       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
957     } else {
958       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
959     }
960     PetscFunctionReturn(0);
961   } else if (isdraw) {
962     PetscDraw  draw;
963     PetscTruth isnull;
964     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
965     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
966   }
967 
968   if (size == 1) {
969     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
970     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
971   } else {
972     /* assemble the entire matrix onto first processor. */
973     Mat         A;
974     Mat_SeqAIJ  *Aloc;
975     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
976     PetscScalar *a;
977 
978     if (!rank) {
979       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
980     } else {
981       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
982     }
983     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
984     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
985     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
986     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
987 
988     /* copy over the A part */
989     Aloc = (Mat_SeqAIJ*)aij->A->data;
990     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
991     row = aij->rstart;
992     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
993     for (i=0; i<m; i++) {
994       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
995       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
996     }
997     aj = Aloc->j;
998     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
999 
1000     /* copy over the B part */
1001     Aloc = (Mat_SeqAIJ*)aij->B->data;
1002     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1003     row  = aij->rstart;
1004     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1005     ct   = cols;
1006     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
1007     for (i=0; i<m; i++) {
1008       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1009       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1010     }
1011     ierr = PetscFree(ct);CHKERRQ(ierr);
1012     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1013     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1014     /*
1015        Everyone has to call to draw the matrix since the graphics waits are
1016        synchronized across all processors that share the PetscDraw object
1017     */
1018     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1019     if (!rank) {
1020       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1021       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1022     }
1023     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1024     ierr = MatDestroy(A);CHKERRQ(ierr);
1025   }
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 #undef __FUNCT__
1030 #define __FUNCT__ "MatView_MPIAIJ"
1031 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1032 {
1033   PetscErrorCode ierr;
1034   PetscTruth     iascii,isdraw,issocket,isbinary;
1035 
1036   PetscFunctionBegin;
1037   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1038   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1039   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1040   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1041   if (iascii || isdraw || isbinary || issocket) {
1042     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1043   } else {
1044     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatRelax_MPIAIJ"
1053 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1054 {
1055   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1056   PetscErrorCode ierr;
1057   Vec            bb1;
1058   PetscScalar    mone=-1.0;
1059 
1060   PetscFunctionBegin;
1061   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1062 
1063   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1064 
1065   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1066     if (flag & SOR_ZERO_INITIAL_GUESS) {
1067       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1068       its--;
1069     }
1070 
1071     while (its--) {
1072       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1073       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1074 
1075       /* update rhs: bb1 = bb - B*x */
1076       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1077       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1078 
1079       /* local sweep */
1080       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1081       CHKERRQ(ierr);
1082     }
1083   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1084     if (flag & SOR_ZERO_INITIAL_GUESS) {
1085       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1086       its--;
1087     }
1088     while (its--) {
1089       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1090       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1091 
1092       /* update rhs: bb1 = bb - B*x */
1093       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1094       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1095 
1096       /* local sweep */
1097       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1098       CHKERRQ(ierr);
1099     }
1100   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1101     if (flag & SOR_ZERO_INITIAL_GUESS) {
1102       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1103       its--;
1104     }
1105     while (its--) {
1106       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1107       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1108 
1109       /* update rhs: bb1 = bb - B*x */
1110       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1111       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1112 
1113       /* local sweep */
1114       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1115       CHKERRQ(ierr);
1116     }
1117   } else {
1118     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1119   }
1120 
1121   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1127 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1128 {
1129   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1130   Mat            A = mat->A,B = mat->B;
1131   PetscErrorCode ierr;
1132   PetscReal      isend[5],irecv[5];
1133 
1134   PetscFunctionBegin;
1135   info->block_size     = 1.0;
1136   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1137   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1138   isend[3] = info->memory;  isend[4] = info->mallocs;
1139   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1140   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1141   isend[3] += info->memory;  isend[4] += info->mallocs;
1142   if (flag == MAT_LOCAL) {
1143     info->nz_used      = isend[0];
1144     info->nz_allocated = isend[1];
1145     info->nz_unneeded  = isend[2];
1146     info->memory       = isend[3];
1147     info->mallocs      = isend[4];
1148   } else if (flag == MAT_GLOBAL_MAX) {
1149     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1150     info->nz_used      = irecv[0];
1151     info->nz_allocated = irecv[1];
1152     info->nz_unneeded  = irecv[2];
1153     info->memory       = irecv[3];
1154     info->mallocs      = irecv[4];
1155   } else if (flag == MAT_GLOBAL_SUM) {
1156     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1157     info->nz_used      = irecv[0];
1158     info->nz_allocated = irecv[1];
1159     info->nz_unneeded  = irecv[2];
1160     info->memory       = irecv[3];
1161     info->mallocs      = irecv[4];
1162   }
1163   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1164   info->fill_ratio_needed = 0;
1165   info->factor_mallocs    = 0;
1166   info->rows_global       = (double)matin->M;
1167   info->columns_global    = (double)matin->N;
1168   info->rows_local        = (double)matin->m;
1169   info->columns_local     = (double)matin->N;
1170 
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatSetOption_MPIAIJ"
1176 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1177 {
1178   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1179   PetscErrorCode ierr;
1180 
1181   PetscFunctionBegin;
1182   switch (op) {
1183   case MAT_NO_NEW_NONZERO_LOCATIONS:
1184   case MAT_YES_NEW_NONZERO_LOCATIONS:
1185   case MAT_COLUMNS_UNSORTED:
1186   case MAT_COLUMNS_SORTED:
1187   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1188   case MAT_KEEP_ZEROED_ROWS:
1189   case MAT_NEW_NONZERO_LOCATION_ERR:
1190   case MAT_USE_INODES:
1191   case MAT_DO_NOT_USE_INODES:
1192   case MAT_IGNORE_ZERO_ENTRIES:
1193     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1195     break;
1196   case MAT_ROW_ORIENTED:
1197     a->roworiented = PETSC_TRUE;
1198     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1199     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1200     break;
1201   case MAT_ROWS_SORTED:
1202   case MAT_ROWS_UNSORTED:
1203   case MAT_YES_NEW_DIAGONALS:
1204     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1205     break;
1206   case MAT_COLUMN_ORIENTED:
1207     a->roworiented = PETSC_FALSE;
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1210     break;
1211   case MAT_IGNORE_OFF_PROC_ENTRIES:
1212     a->donotstash = PETSC_TRUE;
1213     break;
1214   case MAT_NO_NEW_DIAGONALS:
1215     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1216   case MAT_SYMMETRIC:
1217   case MAT_STRUCTURALLY_SYMMETRIC:
1218   case MAT_HERMITIAN:
1219   case MAT_SYMMETRY_ETERNAL:
1220     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1221     break;
1222   case MAT_NOT_SYMMETRIC:
1223   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1224   case MAT_NOT_HERMITIAN:
1225   case MAT_NOT_SYMMETRY_ETERNAL:
1226     break;
1227   default:
1228     SETERRQ(PETSC_ERR_SUP,"unknown option");
1229   }
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatGetRow_MPIAIJ"
1235 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1236 {
1237   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1238   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1239   PetscErrorCode ierr;
1240   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1241   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1242   PetscInt       *cmap,*idx_p;
1243 
1244   PetscFunctionBegin;
1245   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1246   mat->getrowactive = PETSC_TRUE;
1247 
1248   if (!mat->rowvalues && (idx || v)) {
1249     /*
1250         allocate enough space to hold information from the longest row.
1251     */
1252     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1253     PetscInt     max = 1,tmp;
1254     for (i=0; i<matin->m; i++) {
1255       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1256       if (max < tmp) { max = tmp; }
1257     }
1258     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1259     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1260   }
1261 
1262   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1263   lrow = row - rstart;
1264 
1265   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1266   if (!v)   {pvA = 0; pvB = 0;}
1267   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1268   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1269   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1270   nztot = nzA + nzB;
1271 
1272   cmap  = mat->garray;
1273   if (v  || idx) {
1274     if (nztot) {
1275       /* Sort by increasing column numbers, assuming A and B already sorted */
1276       PetscInt imark = -1;
1277       if (v) {
1278         *v = v_p = mat->rowvalues;
1279         for (i=0; i<nzB; i++) {
1280           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1281           else break;
1282         }
1283         imark = i;
1284         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1285         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1286       }
1287       if (idx) {
1288         *idx = idx_p = mat->rowindices;
1289         if (imark > -1) {
1290           for (i=0; i<imark; i++) {
1291             idx_p[i] = cmap[cworkB[i]];
1292           }
1293         } else {
1294           for (i=0; i<nzB; i++) {
1295             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1296             else break;
1297           }
1298           imark = i;
1299         }
1300         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1301         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1302       }
1303     } else {
1304       if (idx) *idx = 0;
1305       if (v)   *v   = 0;
1306     }
1307   }
1308   *nz = nztot;
1309   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1310   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1316 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1317 {
1318   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1319 
1320   PetscFunctionBegin;
1321   if (!aij->getrowactive) {
1322     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1323   }
1324   aij->getrowactive = PETSC_FALSE;
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatNorm_MPIAIJ"
1330 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1331 {
1332   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1333   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1334   PetscErrorCode ierr;
1335   PetscInt       i,j,cstart = aij->cstart;
1336   PetscReal      sum = 0.0;
1337   PetscScalar    *v;
1338 
1339   PetscFunctionBegin;
1340   if (aij->size == 1) {
1341     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1342   } else {
1343     if (type == NORM_FROBENIUS) {
1344       v = amat->a;
1345       for (i=0; i<amat->nz; i++) {
1346 #if defined(PETSC_USE_COMPLEX)
1347         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1348 #else
1349         sum += (*v)*(*v); v++;
1350 #endif
1351       }
1352       v = bmat->a;
1353       for (i=0; i<bmat->nz; i++) {
1354 #if defined(PETSC_USE_COMPLEX)
1355         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1356 #else
1357         sum += (*v)*(*v); v++;
1358 #endif
1359       }
1360       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1361       *norm = sqrt(*norm);
1362     } else if (type == NORM_1) { /* max column norm */
1363       PetscReal *tmp,*tmp2;
1364       PetscInt    *jj,*garray = aij->garray;
1365       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1366       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1367       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1368       *norm = 0.0;
1369       v = amat->a; jj = amat->j;
1370       for (j=0; j<amat->nz; j++) {
1371         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1372       }
1373       v = bmat->a; jj = bmat->j;
1374       for (j=0; j<bmat->nz; j++) {
1375         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1376       }
1377       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1378       for (j=0; j<mat->N; j++) {
1379         if (tmp2[j] > *norm) *norm = tmp2[j];
1380       }
1381       ierr = PetscFree(tmp);CHKERRQ(ierr);
1382       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1383     } else if (type == NORM_INFINITY) { /* max row norm */
1384       PetscReal ntemp = 0.0;
1385       for (j=0; j<aij->A->m; j++) {
1386         v = amat->a + amat->i[j];
1387         sum = 0.0;
1388         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1389           sum += PetscAbsScalar(*v); v++;
1390         }
1391         v = bmat->a + bmat->i[j];
1392         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1393           sum += PetscAbsScalar(*v); v++;
1394         }
1395         if (sum > ntemp) ntemp = sum;
1396       }
1397       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1398     } else {
1399       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1400     }
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatTranspose_MPIAIJ"
1407 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1408 {
1409   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1410   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1411   PetscErrorCode ierr;
1412   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1413   Mat            B;
1414   PetscScalar    *array;
1415 
1416   PetscFunctionBegin;
1417   if (!matout && M != N) {
1418     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1419   }
1420 
1421   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1422   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1423   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1424 
1425   /* copy over the A part */
1426   Aloc = (Mat_SeqAIJ*)a->A->data;
1427   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1428   row = a->rstart;
1429   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1430   for (i=0; i<m; i++) {
1431     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1432     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1433   }
1434   aj = Aloc->j;
1435   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1436 
1437   /* copy over the B part */
1438   Aloc = (Mat_SeqAIJ*)a->B->data;
1439   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1440   row  = a->rstart;
1441   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1442   ct   = cols;
1443   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1444   for (i=0; i<m; i++) {
1445     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1446     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1447   }
1448   ierr = PetscFree(ct);CHKERRQ(ierr);
1449   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1450   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1451   if (matout) {
1452     *matout = B;
1453   } else {
1454     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1455   }
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1461 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1462 {
1463   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1464   Mat            a = aij->A,b = aij->B;
1465   PetscErrorCode ierr;
1466   PetscInt       s1,s2,s3;
1467 
1468   PetscFunctionBegin;
1469   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1470   if (rr) {
1471     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1472     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1473     /* Overlap communication with computation. */
1474     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1475   }
1476   if (ll) {
1477     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1478     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1479     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1480   }
1481   /* scale  the diagonal block */
1482   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1483 
1484   if (rr) {
1485     /* Do a scatter end and then right scale the off-diagonal block */
1486     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1487     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1488   }
1489 
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1496 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1497 {
1498   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   if (!a->rank) {
1503     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1504   }
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1510 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1511 {
1512   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1513   PetscErrorCode ierr;
1514 
1515   PetscFunctionBegin;
1516   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1517   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1522 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1523 {
1524   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1525   PetscErrorCode ierr;
1526 
1527   PetscFunctionBegin;
1528   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatEqual_MPIAIJ"
1534 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1535 {
1536   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1537   Mat            a,b,c,d;
1538   PetscTruth     flg;
1539   PetscErrorCode ierr;
1540 
1541   PetscFunctionBegin;
1542   a = matA->A; b = matA->B;
1543   c = matB->A; d = matB->B;
1544 
1545   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1546   if (flg) {
1547     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1548   }
1549   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatCopy_MPIAIJ"
1555 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1556 {
1557   PetscErrorCode ierr;
1558   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1559   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1560 
1561   PetscFunctionBegin;
1562   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1563   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1564     /* because of the column compression in the off-processor part of the matrix a->B,
1565        the number of columns in a->B and b->B may be different, hence we cannot call
1566        the MatCopy() directly on the two parts. If need be, we can provide a more
1567        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1568        then copying the submatrices */
1569     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1570   } else {
1571     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1572     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1579 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1580 {
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #include "petscblaslapack.h"
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatAXPY_MPIAIJ"
1591 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1592 {
1593   PetscErrorCode ierr;
1594   PetscInt       i;
1595   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1596   PetscBLASInt   bnz,one=1;
1597   Mat_SeqAIJ     *x,*y;
1598 
1599   PetscFunctionBegin;
1600   if (str == SAME_NONZERO_PATTERN) {
1601     x = (Mat_SeqAIJ *)xx->A->data;
1602     y = (Mat_SeqAIJ *)yy->A->data;
1603     bnz = (PetscBLASInt)x->nz;
1604     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1605     x = (Mat_SeqAIJ *)xx->B->data;
1606     y = (Mat_SeqAIJ *)yy->B->data;
1607     bnz = (PetscBLASInt)x->nz;
1608     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1609   } else if (str == SUBSET_NONZERO_PATTERN) {
1610     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1611 
1612     x = (Mat_SeqAIJ *)xx->B->data;
1613     y = (Mat_SeqAIJ *)yy->B->data;
1614     if (y->xtoy && y->XtoY != xx->B) {
1615       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1616       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1617     }
1618     if (!y->xtoy) { /* get xtoy */
1619       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1620       y->XtoY = xx->B;
1621     }
1622     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1623   } else {
1624     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1625   }
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 /* -------------------------------------------------------------------*/
1630 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1631        MatGetRow_MPIAIJ,
1632        MatRestoreRow_MPIAIJ,
1633        MatMult_MPIAIJ,
1634 /* 4*/ MatMultAdd_MPIAIJ,
1635        MatMultTranspose_MPIAIJ,
1636        MatMultTransposeAdd_MPIAIJ,
1637        0,
1638        0,
1639        0,
1640 /*10*/ 0,
1641        0,
1642        0,
1643        MatRelax_MPIAIJ,
1644        MatTranspose_MPIAIJ,
1645 /*15*/ MatGetInfo_MPIAIJ,
1646        MatEqual_MPIAIJ,
1647        MatGetDiagonal_MPIAIJ,
1648        MatDiagonalScale_MPIAIJ,
1649        MatNorm_MPIAIJ,
1650 /*20*/ MatAssemblyBegin_MPIAIJ,
1651        MatAssemblyEnd_MPIAIJ,
1652        0,
1653        MatSetOption_MPIAIJ,
1654        MatZeroEntries_MPIAIJ,
1655 /*25*/ MatZeroRows_MPIAIJ,
1656        0,
1657        0,
1658        0,
1659        0,
1660 /*30*/ MatSetUpPreallocation_MPIAIJ,
1661        0,
1662        0,
1663        0,
1664        0,
1665 /*35*/ MatDuplicate_MPIAIJ,
1666        0,
1667        0,
1668        0,
1669        0,
1670 /*40*/ MatAXPY_MPIAIJ,
1671        MatGetSubMatrices_MPIAIJ,
1672        MatIncreaseOverlap_MPIAIJ,
1673        MatGetValues_MPIAIJ,
1674        MatCopy_MPIAIJ,
1675 /*45*/ MatPrintHelp_MPIAIJ,
1676        MatScale_MPIAIJ,
1677        0,
1678        0,
1679        0,
1680 /*50*/ MatSetBlockSize_MPIAIJ,
1681        0,
1682        0,
1683        0,
1684        0,
1685 /*55*/ MatFDColoringCreate_MPIAIJ,
1686        0,
1687        MatSetUnfactored_MPIAIJ,
1688        0,
1689        0,
1690 /*60*/ MatGetSubMatrix_MPIAIJ,
1691        MatDestroy_MPIAIJ,
1692        MatView_MPIAIJ,
1693        MatGetPetscMaps_Petsc,
1694        0,
1695 /*65*/ 0,
1696        0,
1697        0,
1698        0,
1699        0,
1700 /*70*/ 0,
1701        0,
1702        MatSetColoring_MPIAIJ,
1703 #if defined(PETSC_HAVE_ADIC)
1704        MatSetValuesAdic_MPIAIJ,
1705 #else
1706        0,
1707 #endif
1708        MatSetValuesAdifor_MPIAIJ,
1709 /*75*/ 0,
1710        0,
1711        0,
1712        0,
1713        0,
1714 /*80*/ 0,
1715        0,
1716        0,
1717        0,
1718 /*84*/ MatLoad_MPIAIJ,
1719        0,
1720        0,
1721        0,
1722        0,
1723        0,
1724 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1725        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1726        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1727        MatPtAP_Basic,
1728        MatPtAPSymbolic_MPIAIJ,
1729 /*95*/ MatPtAPNumeric_MPIAIJ,
1730        0,
1731        0,
1732        0,
1733        0,
1734 /*100*/0,
1735        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1736        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1737 };
1738 
1739 /* ----------------------------------------------------------------------------------------*/
1740 
1741 EXTERN_C_BEGIN
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1744 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1745 {
1746   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1751   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1752   PetscFunctionReturn(0);
1753 }
1754 EXTERN_C_END
1755 
1756 EXTERN_C_BEGIN
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1759 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1760 {
1761   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1762   PetscErrorCode ierr;
1763 
1764   PetscFunctionBegin;
1765   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1766   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 EXTERN_C_END
1770 
1771 #include "petscpc.h"
1772 EXTERN_C_BEGIN
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1776 {
1777   Mat_MPIAIJ     *b;
1778   PetscErrorCode ierr;
1779   PetscInt       i;
1780 
1781   PetscFunctionBegin;
1782   B->preallocated = PETSC_TRUE;
1783   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1784   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1785   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1786   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1787   if (d_nnz) {
1788     for (i=0; i<B->m; i++) {
1789       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1790     }
1791   }
1792   if (o_nnz) {
1793     for (i=0; i<B->m; i++) {
1794       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1795     }
1796   }
1797   b = (Mat_MPIAIJ*)B->data;
1798   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1799   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1800 
1801   PetscFunctionReturn(0);
1802 }
1803 EXTERN_C_END
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1807 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1808 {
1809   Mat            mat;
1810   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1811   PetscErrorCode ierr;
1812 
1813   PetscFunctionBegin;
1814   *newmat       = 0;
1815   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1816   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1817   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1818   a    = (Mat_MPIAIJ*)mat->data;
1819 
1820   mat->factor       = matin->factor;
1821   mat->bs           = matin->bs;
1822   mat->assembled    = PETSC_TRUE;
1823   mat->insertmode   = NOT_SET_VALUES;
1824   mat->preallocated = PETSC_TRUE;
1825 
1826   a->rstart       = oldmat->rstart;
1827   a->rend         = oldmat->rend;
1828   a->cstart       = oldmat->cstart;
1829   a->cend         = oldmat->cend;
1830   a->size         = oldmat->size;
1831   a->rank         = oldmat->rank;
1832   a->donotstash   = oldmat->donotstash;
1833   a->roworiented  = oldmat->roworiented;
1834   a->rowindices   = 0;
1835   a->rowvalues    = 0;
1836   a->getrowactive = PETSC_FALSE;
1837 
1838   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1839   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1840   if (oldmat->colmap) {
1841 #if defined (PETSC_USE_CTABLE)
1842     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1843 #else
1844     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1845     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1846     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1847 #endif
1848   } else a->colmap = 0;
1849   if (oldmat->garray) {
1850     PetscInt len;
1851     len  = oldmat->B->n;
1852     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1853     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1854     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1855   } else a->garray = 0;
1856 
1857   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1858   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1859   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1860   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1861   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1862   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1863   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1864   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1865   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1866   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1867   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1868   *newmat = mat;
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #include "petscsys.h"
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatLoad_MPIAIJ"
1876 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1877 {
1878   Mat            A;
1879   PetscScalar    *vals,*svals;
1880   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1881   MPI_Status     status;
1882   PetscErrorCode ierr;
1883   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1884   PetscInt       i,nz,j,rstart,rend,mmax;
1885   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1886   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1887   PetscInt       cend,cstart,n,*rowners;
1888   int            fd;
1889 
1890   PetscFunctionBegin;
1891   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1892   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1893   if (!rank) {
1894     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1895     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1896     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1897   }
1898 
1899   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1900   M = header[1]; N = header[2];
1901   /* determine ownership of all rows */
1902   m    = M/size + ((M % size) > rank);
1903   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1904   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1905 
1906   /* First process needs enough room for process with most rows */
1907   if (!rank) {
1908     mmax       = rowners[1];
1909     for (i=2; i<size; i++) {
1910       mmax = PetscMax(mmax,rowners[i]);
1911     }
1912   } else mmax = m;
1913 
1914   rowners[0] = 0;
1915   for (i=2; i<=size; i++) {
1916     mmax       = PetscMax(mmax,rowners[i]);
1917     rowners[i] += rowners[i-1];
1918   }
1919   rstart = rowners[rank];
1920   rend   = rowners[rank+1];
1921 
1922   /* distribute row lengths to all processors */
1923   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1924   if (!rank) {
1925     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1926     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1927     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1928     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1929     for (j=0; j<m; j++) {
1930       procsnz[0] += ourlens[j];
1931     }
1932     for (i=1; i<size; i++) {
1933       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1934       /* calculate the number of nonzeros on each processor */
1935       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1936         procsnz[i] += rowlengths[j];
1937       }
1938       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1939     }
1940     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1941   } else {
1942     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1943   }
1944 
1945   if (!rank) {
1946     /* determine max buffer needed and allocate it */
1947     maxnz = 0;
1948     for (i=0; i<size; i++) {
1949       maxnz = PetscMax(maxnz,procsnz[i]);
1950     }
1951     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1952 
1953     /* read in my part of the matrix column indices  */
1954     nz   = procsnz[0];
1955     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1956     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1957 
1958     /* read in every one elses and ship off */
1959     for (i=1; i<size; i++) {
1960       nz   = procsnz[i];
1961       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1962       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1963     }
1964     ierr = PetscFree(cols);CHKERRQ(ierr);
1965   } else {
1966     /* determine buffer space needed for message */
1967     nz = 0;
1968     for (i=0; i<m; i++) {
1969       nz += ourlens[i];
1970     }
1971     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1972 
1973     /* receive message of column indices*/
1974     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1975     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1976     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1977   }
1978 
1979   /* determine column ownership if matrix is not square */
1980   if (N != M) {
1981     n      = N/size + ((N % size) > rank);
1982     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1983     cstart = cend - n;
1984   } else {
1985     cstart = rstart;
1986     cend   = rend;
1987     n      = cend - cstart;
1988   }
1989 
1990   /* loop over local rows, determining number of off diagonal entries */
1991   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1992   jj = 0;
1993   for (i=0; i<m; i++) {
1994     for (j=0; j<ourlens[i]; j++) {
1995       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1996       jj++;
1997     }
1998   }
1999 
2000   /* create our matrix */
2001   for (i=0; i<m; i++) {
2002     ourlens[i] -= offlens[i];
2003   }
2004   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2005   ierr = MatSetType(A,type);CHKERRQ(ierr);
2006   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2007 
2008   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2009   for (i=0; i<m; i++) {
2010     ourlens[i] += offlens[i];
2011   }
2012 
2013   if (!rank) {
2014     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2015 
2016     /* read in my part of the matrix numerical values  */
2017     nz   = procsnz[0];
2018     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2019 
2020     /* insert into matrix */
2021     jj      = rstart;
2022     smycols = mycols;
2023     svals   = vals;
2024     for (i=0; i<m; i++) {
2025       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2026       smycols += ourlens[i];
2027       svals   += ourlens[i];
2028       jj++;
2029     }
2030 
2031     /* read in other processors and ship out */
2032     for (i=1; i<size; i++) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2035       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2038   } else {
2039     /* receive numeric values */
2040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2041 
2042     /* receive message of values*/
2043     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2044     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2045     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2046 
2047     /* insert into matrix */
2048     jj      = rstart;
2049     smycols = mycols;
2050     svals   = vals;
2051     for (i=0; i<m; i++) {
2052       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2053       smycols += ourlens[i];
2054       svals   += ourlens[i];
2055       jj++;
2056     }
2057   }
2058   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2059   ierr = PetscFree(vals);CHKERRQ(ierr);
2060   ierr = PetscFree(mycols);CHKERRQ(ierr);
2061   ierr = PetscFree(rowners);CHKERRQ(ierr);
2062 
2063   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   *newmat = A;
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2071 /*
2072     Not great since it makes two copies of the submatrix, first an SeqAIJ
2073   in local and then by concatenating the local matrices the end result.
2074   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2075 */
2076 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2077 {
2078   PetscErrorCode ierr;
2079   PetscMPIInt    rank,size;
2080   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2081   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2082   Mat            *local,M,Mreuse;
2083   PetscScalar    *vwork,*aa;
2084   MPI_Comm       comm = mat->comm;
2085   Mat_SeqAIJ     *aij;
2086 
2087 
2088   PetscFunctionBegin;
2089   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2090   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2091 
2092   if (call ==  MAT_REUSE_MATRIX) {
2093     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2094     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2095     local = &Mreuse;
2096     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2097   } else {
2098     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2099     Mreuse = *local;
2100     ierr   = PetscFree(local);CHKERRQ(ierr);
2101   }
2102 
2103   /*
2104       m - number of local rows
2105       n - number of columns (same on all processors)
2106       rstart - first row in new global matrix generated
2107   */
2108   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2109   if (call == MAT_INITIAL_MATRIX) {
2110     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2111     ii  = aij->i;
2112     jj  = aij->j;
2113 
2114     /*
2115         Determine the number of non-zeros in the diagonal and off-diagonal
2116         portions of the matrix in order to do correct preallocation
2117     */
2118 
2119     /* first get start and end of "diagonal" columns */
2120     if (csize == PETSC_DECIDE) {
2121       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2122       if (mglobal == n) { /* square matrix */
2123 	nlocal = m;
2124       } else {
2125         nlocal = n/size + ((n % size) > rank);
2126       }
2127     } else {
2128       nlocal = csize;
2129     }
2130     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2131     rstart = rend - nlocal;
2132     if (rank == size - 1 && rend != n) {
2133       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2134     }
2135 
2136     /* next, compute all the lengths */
2137     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2138     olens = dlens + m;
2139     for (i=0; i<m; i++) {
2140       jend = ii[i+1] - ii[i];
2141       olen = 0;
2142       dlen = 0;
2143       for (j=0; j<jend; j++) {
2144         if (*jj < rstart || *jj >= rend) olen++;
2145         else dlen++;
2146         jj++;
2147       }
2148       olens[i] = olen;
2149       dlens[i] = dlen;
2150     }
2151     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2152     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2153     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2154     ierr = PetscFree(dlens);CHKERRQ(ierr);
2155   } else {
2156     PetscInt ml,nl;
2157 
2158     M = *newmat;
2159     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2160     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2161     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2162     /*
2163          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2164        rather than the slower MatSetValues().
2165     */
2166     M->was_assembled = PETSC_TRUE;
2167     M->assembled     = PETSC_FALSE;
2168   }
2169   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2170   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2171   ii  = aij->i;
2172   jj  = aij->j;
2173   aa  = aij->a;
2174   for (i=0; i<m; i++) {
2175     row   = rstart + i;
2176     nz    = ii[i+1] - ii[i];
2177     cwork = jj;     jj += nz;
2178     vwork = aa;     aa += nz;
2179     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2180   }
2181 
2182   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   *newmat = M;
2185 
2186   /* save submatrix used in processor for next request */
2187   if (call ==  MAT_INITIAL_MATRIX) {
2188     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2189     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2190   }
2191 
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 EXTERN_C_BEGIN
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2198 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2199 {
2200   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2201   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2202   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2203   const PetscInt *JJ;
2204   PetscScalar    *values;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208 #if defined(PETSC_OPT_g)
2209   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2210 #endif
2211   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2212   o_nnz = d_nnz + m;
2213 
2214   for (i=0; i<m; i++) {
2215     nnz     = I[i+1]- I[i];
2216     JJ      = J + I[i];
2217     nnz_max = PetscMax(nnz_max,nnz);
2218 #if defined(PETSC_OPT_g)
2219     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2220 #endif
2221     for (j=0; j<nnz; j++) {
2222       if (*JJ >= cstart) break;
2223       JJ++;
2224     }
2225     d = 0;
2226     for (; j<nnz; j++) {
2227       if (*JJ++ >= cend) break;
2228       d++;
2229     }
2230     d_nnz[i] = d;
2231     o_nnz[i] = nnz - d;
2232   }
2233   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2234   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2235 
2236   if (v) values = (PetscScalar*)v;
2237   else {
2238     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2239     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2240   }
2241 
2242   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2243   for (i=0; i<m; i++) {
2244     ii   = i + rstart;
2245     nnz  = I[i+1]- I[i];
2246     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2247   }
2248   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2249   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2251 
2252   if (!v) {
2253     ierr = PetscFree(values);CHKERRQ(ierr);
2254   }
2255   PetscFunctionReturn(0);
2256 }
2257 EXTERN_C_END
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2261 /*@C
2262    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2263    (the default parallel PETSc format).
2264 
2265    Collective on MPI_Comm
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 .  i - the indices into j for the start of each local row (starts with zero)
2270 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2271 -  v - optional values in the matrix
2272 
2273    Level: developer
2274 
2275 .keywords: matrix, aij, compressed row, sparse, parallel
2276 
2277 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2278 @*/
2279 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2280 {
2281   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2282 
2283   PetscFunctionBegin;
2284   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2285   if (f) {
2286     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2293 /*@C
2294    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2295    (the default parallel PETSc format).  For good matrix assembly performance
2296    the user should preallocate the matrix storage by setting the parameters
2297    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2298    performance can be increased by more than a factor of 50.
2299 
2300    Collective on MPI_Comm
2301 
2302    Input Parameters:
2303 +  A - the matrix
2304 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2305            (same value is used for all local rows)
2306 .  d_nnz - array containing the number of nonzeros in the various rows of the
2307            DIAGONAL portion of the local submatrix (possibly different for each row)
2308            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2309            The size of this array is equal to the number of local rows, i.e 'm'.
2310            You must leave room for the diagonal entry even if it is zero.
2311 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2312            submatrix (same value is used for all local rows).
2313 -  o_nnz - array containing the number of nonzeros in the various rows of the
2314            OFF-DIAGONAL portion of the local submatrix (possibly different for
2315            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2316            structure. The size of this array is equal to the number
2317            of local rows, i.e 'm'.
2318 
2319    If the *_nnz parameter is given then the *_nz parameter is ignored
2320 
2321    The AIJ format (also called the Yale sparse matrix format or
2322    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2323    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2324 
2325    The parallel matrix is partitioned such that the first m0 rows belong to
2326    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2327    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2328 
2329    The DIAGONAL portion of the local submatrix of a processor can be defined
2330    as the submatrix which is obtained by extraction the part corresponding
2331    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2332    first row that belongs to the processor, and r2 is the last row belonging
2333    to the this processor. This is a square mxm matrix. The remaining portion
2334    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2335 
2336    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2337 
2338    Example usage:
2339 
2340    Consider the following 8x8 matrix with 34 non-zero values, that is
2341    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2342    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2343    as follows:
2344 
2345 .vb
2346             1  2  0  |  0  3  0  |  0  4
2347     Proc0   0  5  6  |  7  0  0  |  8  0
2348             9  0 10  | 11  0  0  | 12  0
2349     -------------------------------------
2350            13  0 14  | 15 16 17  |  0  0
2351     Proc1   0 18  0  | 19 20 21  |  0  0
2352             0  0  0  | 22 23  0  | 24  0
2353     -------------------------------------
2354     Proc2  25 26 27  |  0  0 28  | 29  0
2355            30  0  0  | 31 32 33  |  0 34
2356 .ve
2357 
2358    This can be represented as a collection of submatrices as:
2359 
2360 .vb
2361       A B C
2362       D E F
2363       G H I
2364 .ve
2365 
2366    Where the submatrices A,B,C are owned by proc0, D,E,F are
2367    owned by proc1, G,H,I are owned by proc2.
2368 
2369    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2370    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2371    The 'M','N' parameters are 8,8, and have the same values on all procs.
2372 
2373    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2374    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2375    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2376    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2377    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2378    matrix, ans [DF] as another SeqAIJ matrix.
2379 
2380    When d_nz, o_nz parameters are specified, d_nz storage elements are
2381    allocated for every row of the local diagonal submatrix, and o_nz
2382    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2383    One way to choose d_nz and o_nz is to use the max nonzerors per local
2384    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2385    In this case, the values of d_nz,o_nz are:
2386 .vb
2387      proc0 : dnz = 2, o_nz = 2
2388      proc1 : dnz = 3, o_nz = 2
2389      proc2 : dnz = 1, o_nz = 4
2390 .ve
2391    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2392    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2393    for proc3. i.e we are using 12+15+10=37 storage locations to store
2394    34 values.
2395 
2396    When d_nnz, o_nnz parameters are specified, the storage is specified
2397    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2398    In the above case the values for d_nnz,o_nnz are:
2399 .vb
2400      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2401      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2402      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2403 .ve
2404    Here the space allocated is sum of all the above values i.e 34, and
2405    hence pre-allocation is perfect.
2406 
2407    Level: intermediate
2408 
2409 .keywords: matrix, aij, compressed row, sparse, parallel
2410 
2411 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2412           MPIAIJ
2413 @*/
2414 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2415 {
2416   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2417 
2418   PetscFunctionBegin;
2419   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2420   if (f) {
2421     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2422   }
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatCreateMPIAIJ"
2428 /*@C
2429    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2430    (the default parallel PETSc format).  For good matrix assembly performance
2431    the user should preallocate the matrix storage by setting the parameters
2432    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2433    performance can be increased by more than a factor of 50.
2434 
2435    Collective on MPI_Comm
2436 
2437    Input Parameters:
2438 +  comm - MPI communicator
2439 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2440            This value should be the same as the local size used in creating the
2441            y vector for the matrix-vector product y = Ax.
2442 .  n - This value should be the same as the local size used in creating the
2443        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2444        calculated if N is given) For square matrices n is almost always m.
2445 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2446 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2447 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2448            (same value is used for all local rows)
2449 .  d_nnz - array containing the number of nonzeros in the various rows of the
2450            DIAGONAL portion of the local submatrix (possibly different for each row)
2451            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2452            The size of this array is equal to the number of local rows, i.e 'm'.
2453            You must leave room for the diagonal entry even if it is zero.
2454 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2455            submatrix (same value is used for all local rows).
2456 -  o_nnz - array containing the number of nonzeros in the various rows of the
2457            OFF-DIAGONAL portion of the local submatrix (possibly different for
2458            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2459            structure. The size of this array is equal to the number
2460            of local rows, i.e 'm'.
2461 
2462    Output Parameter:
2463 .  A - the matrix
2464 
2465    Notes:
2466    If the *_nnz parameter is given then the *_nz parameter is ignored
2467 
2468    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2469    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2470    storage requirements for this matrix.
2471 
2472    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2473    processor than it must be used on all processors that share the object for
2474    that argument.
2475 
2476    The user MUST specify either the local or global matrix dimensions
2477    (possibly both).
2478 
2479    The parallel matrix is partitioned such that the first m0 rows belong to
2480    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2481    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2482 
2483    The DIAGONAL portion of the local submatrix of a processor can be defined
2484    as the submatrix which is obtained by extraction the part corresponding
2485    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2486    first row that belongs to the processor, and r2 is the last row belonging
2487    to the this processor. This is a square mxm matrix. The remaining portion
2488    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2489 
2490    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2491 
2492    When calling this routine with a single process communicator, a matrix of
2493    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2494    type of communicator, use the construction mechanism:
2495      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2496 
2497    By default, this format uses inodes (identical nodes) when possible.
2498    We search for consecutive rows with the same nonzero structure, thereby
2499    reusing matrix information to achieve increased efficiency.
2500 
2501    Options Database Keys:
2502 +  -mat_no_inode  - Do not use inodes
2503 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2504 -  -mat_aij_oneindex - Internally use indexing starting at 1
2505         rather than 0.  Note that when calling MatSetValues(),
2506         the user still MUST index entries starting at 0!
2507 
2508 
2509    Example usage:
2510 
2511    Consider the following 8x8 matrix with 34 non-zero values, that is
2512    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2513    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2514    as follows:
2515 
2516 .vb
2517             1  2  0  |  0  3  0  |  0  4
2518     Proc0   0  5  6  |  7  0  0  |  8  0
2519             9  0 10  | 11  0  0  | 12  0
2520     -------------------------------------
2521            13  0 14  | 15 16 17  |  0  0
2522     Proc1   0 18  0  | 19 20 21  |  0  0
2523             0  0  0  | 22 23  0  | 24  0
2524     -------------------------------------
2525     Proc2  25 26 27  |  0  0 28  | 29  0
2526            30  0  0  | 31 32 33  |  0 34
2527 .ve
2528 
2529    This can be represented as a collection of submatrices as:
2530 
2531 .vb
2532       A B C
2533       D E F
2534       G H I
2535 .ve
2536 
2537    Where the submatrices A,B,C are owned by proc0, D,E,F are
2538    owned by proc1, G,H,I are owned by proc2.
2539 
2540    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2541    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2542    The 'M','N' parameters are 8,8, and have the same values on all procs.
2543 
2544    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2545    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2546    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2547    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2548    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2549    matrix, ans [DF] as another SeqAIJ matrix.
2550 
2551    When d_nz, o_nz parameters are specified, d_nz storage elements are
2552    allocated for every row of the local diagonal submatrix, and o_nz
2553    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2554    One way to choose d_nz and o_nz is to use the max nonzerors per local
2555    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2556    In this case, the values of d_nz,o_nz are:
2557 .vb
2558      proc0 : dnz = 2, o_nz = 2
2559      proc1 : dnz = 3, o_nz = 2
2560      proc2 : dnz = 1, o_nz = 4
2561 .ve
2562    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2563    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2564    for proc3. i.e we are using 12+15+10=37 storage locations to store
2565    34 values.
2566 
2567    When d_nnz, o_nnz parameters are specified, the storage is specified
2568    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2569    In the above case the values for d_nnz,o_nnz are:
2570 .vb
2571      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2572      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2573      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2574 .ve
2575    Here the space allocated is sum of all the above values i.e 34, and
2576    hence pre-allocation is perfect.
2577 
2578    Level: intermediate
2579 
2580 .keywords: matrix, aij, compressed row, sparse, parallel
2581 
2582 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2583           MPIAIJ
2584 @*/
2585 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2586 {
2587   PetscErrorCode ierr;
2588   PetscMPIInt    size;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2592   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2593   if (size > 1) {
2594     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2595     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2596   } else {
2597     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2598     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2599   }
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2605 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2606 {
2607   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2608 
2609   PetscFunctionBegin;
2610   *Ad     = a->A;
2611   *Ao     = a->B;
2612   *colmap = a->garray;
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 #undef __FUNCT__
2617 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2618 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2619 {
2620   PetscErrorCode ierr;
2621   PetscInt       i;
2622   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2623 
2624   PetscFunctionBegin;
2625   if (coloring->ctype == IS_COLORING_LOCAL) {
2626     ISColoringValue *allcolors,*colors;
2627     ISColoring      ocoloring;
2628 
2629     /* set coloring for diagonal portion */
2630     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2631 
2632     /* set coloring for off-diagonal portion */
2633     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2634     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2635     for (i=0; i<a->B->n; i++) {
2636       colors[i] = allcolors[a->garray[i]];
2637     }
2638     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2639     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2640     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2641     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2642   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2643     ISColoringValue *colors;
2644     PetscInt             *larray;
2645     ISColoring      ocoloring;
2646 
2647     /* set coloring for diagonal portion */
2648     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2649     for (i=0; i<a->A->n; i++) {
2650       larray[i] = i + a->cstart;
2651     }
2652     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2653     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2654     for (i=0; i<a->A->n; i++) {
2655       colors[i] = coloring->colors[larray[i]];
2656     }
2657     ierr = PetscFree(larray);CHKERRQ(ierr);
2658     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2659     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2660     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2661 
2662     /* set coloring for off-diagonal portion */
2663     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2664     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2665     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2666     for (i=0; i<a->B->n; i++) {
2667       colors[i] = coloring->colors[larray[i]];
2668     }
2669     ierr = PetscFree(larray);CHKERRQ(ierr);
2670     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2671     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2672     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2673   } else {
2674     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2675   }
2676 
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 #if defined(PETSC_HAVE_ADIC)
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2683 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2684 {
2685   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2686   PetscErrorCode ierr;
2687 
2688   PetscFunctionBegin;
2689   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2690   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2691   PetscFunctionReturn(0);
2692 }
2693 #endif
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2697 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2698 {
2699   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2700   PetscErrorCode ierr;
2701 
2702   PetscFunctionBegin;
2703   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2704   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 #undef __FUNCT__
2709 #define __FUNCT__ "MatMerge"
2710 /*@C
2711       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2712                  matrices from each processor
2713 
2714     Collective on MPI_Comm
2715 
2716    Input Parameters:
2717 +    comm - the communicators the parallel matrix will live on
2718 .    inmat - the input sequential matrices
2719 .    n - number of local columns (or PETSC_DECIDE)
2720 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2721 
2722    Output Parameter:
2723 .    outmat - the parallel matrix generated
2724 
2725     Level: advanced
2726 
2727    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2728 
2729 @*/
2730 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2731 {
2732   PetscErrorCode ierr;
2733   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2734   PetscInt       *indx;
2735   PetscScalar    *values;
2736   PetscMap       columnmap,rowmap;
2737 
2738   PetscFunctionBegin;
2739     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2740   /*
2741   PetscMPIInt       rank;
2742   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2743   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2744   */
2745   if (scall == MAT_INITIAL_MATRIX){
2746     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2747     if (n == PETSC_DECIDE){
2748       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2749       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2750       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2751       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2752       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2753     }
2754 
2755     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2756     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2757     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2758     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2759     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2760 
2761     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2762     for (i=0;i<m;i++) {
2763       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2764       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2765       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2766     }
2767     /* This routine will ONLY return MPIAIJ type matrix */
2768     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2769     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2770     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2771     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2772 
2773   } else if (scall == MAT_REUSE_MATRIX){
2774     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2775   } else {
2776     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2777   }
2778 
2779   for (i=0;i<m;i++) {
2780     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2781     I    = i + rstart;
2782     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2783     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2784   }
2785   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2786   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2787   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788 
2789   PetscFunctionReturn(0);
2790 }
2791 
2792 #undef __FUNCT__
2793 #define __FUNCT__ "MatFileSplit"
2794 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2795 {
2796   PetscErrorCode    ierr;
2797   PetscMPIInt       rank;
2798   PetscInt          m,N,i,rstart,nnz;
2799   size_t            len;
2800   const PetscInt    *indx;
2801   PetscViewer       out;
2802   char              *name;
2803   Mat               B;
2804   const PetscScalar *values;
2805 
2806   PetscFunctionBegin;
2807   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2808   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2809   /* Should this be the type of the diagonal block of A? */
2810   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2811   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2812   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2813   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2814   for (i=0;i<m;i++) {
2815     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2816     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2817     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2818   }
2819   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2820   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2821 
2822   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2823   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2824   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2825   sprintf(name,"%s.%d",outfile,rank);
2826   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2827   ierr = PetscFree(name);
2828   ierr = MatView(B,out);CHKERRQ(ierr);
2829   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2830   ierr = MatDestroy(B);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2837 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2838 {
2839   PetscErrorCode       ierr;
2840   Mat_Merge_SeqsToMPI  *merge;
2841   PetscObjectContainer container;
2842 
2843   PetscFunctionBegin;
2844   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2845   if (container) {
2846     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2847     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2848     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2849     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2850     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2851     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2852     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2853     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2854     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2855     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2856     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2857     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2858 
2859     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2860     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2861   }
2862   ierr = PetscFree(merge);CHKERRQ(ierr);
2863 
2864   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #include "src/mat/utils/freespace.h"
2869 #include "petscbt.h"
2870 static PetscEvent logkey_seqstompinum = 0;
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2873 /*@C
2874       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2875                  matrices from each processor
2876 
2877     Collective on MPI_Comm
2878 
2879    Input Parameters:
2880 +    comm - the communicators the parallel matrix will live on
2881 .    seqmat - the input sequential matrices
2882 .    m - number of local rows (or PETSC_DECIDE)
2883 .    n - number of local columns (or PETSC_DECIDE)
2884 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2885 
2886    Output Parameter:
2887 .    mpimat - the parallel matrix generated
2888 
2889     Level: advanced
2890 
2891    Notes:
2892      The dimensions of the sequential matrix in each processor MUST be the same.
2893      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2894      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2895 @*/
2896 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2897 {
2898   PetscErrorCode       ierr;
2899   MPI_Comm             comm=mpimat->comm;
2900   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2901   PetscMPIInt          size,rank,taga,*len_s;
2902   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2903   PetscInt             proc,m;
2904   PetscInt             **buf_ri,**buf_rj;
2905   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2906   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2907   MPI_Request          *s_waits,*r_waits;
2908   MPI_Status           *status;
2909   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2910   Mat_Merge_SeqsToMPI  *merge;
2911   PetscObjectContainer container;
2912 
2913   PetscFunctionBegin;
2914   if (!logkey_seqstompinum) {
2915     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2916   }
2917   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2918 
2919   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2920   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2921 
2922   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2923   if (container) {
2924     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2925   }
2926   bi     = merge->bi;
2927   bj     = merge->bj;
2928   buf_ri = merge->buf_ri;
2929   buf_rj = merge->buf_rj;
2930 
2931   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2932   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2933   len_s  = merge->len_s;
2934 
2935   /* send and recv matrix values */
2936   /*-----------------------------*/
2937   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2938   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2939 
2940   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2941   for (proc=0,k=0; proc<size; proc++){
2942     if (!len_s[proc]) continue;
2943     i = owners[proc];
2944     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2945     k++;
2946   }
2947 
2948   ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);
2949   ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);
2950   ierr = PetscFree(status);CHKERRQ(ierr);
2951 
2952   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2953   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2954 
2955   /* insert mat values of mpimat */
2956   /*----------------------------*/
2957   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2958   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2959   nextrow = buf_ri_k + merge->nrecv;
2960   nextai  = nextrow + merge->nrecv;
2961 
2962   for (k=0; k<merge->nrecv; k++){
2963     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2964     nrows = *(buf_ri_k[k]);
2965     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2966     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2967   }
2968 
2969   /* set values of ba */
2970   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2971   for (i=0; i<m; i++) {
2972     arow = owners[rank] + i;
2973     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2974     bnzi = bi[i+1] - bi[i];
2975     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2976 
2977     /* add local non-zero vals of this proc's seqmat into ba */
2978     anzi = ai[arow+1] - ai[arow];
2979     aj   = a->j + ai[arow];
2980     aa   = a->a + ai[arow];
2981     nextaj = 0;
2982     for (j=0; nextaj<anzi; j++){
2983       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2984         ba_i[j] += aa[nextaj++];
2985       }
2986     }
2987 
2988     /* add received vals into ba */
2989     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2990       /* i-th row */
2991       if (i == *nextrow[k]) {
2992         anzi = *(nextai[k]+1) - *nextai[k];
2993         aj   = buf_rj[k] + *(nextai[k]);
2994         aa   = abuf_r[k] + *(nextai[k]);
2995         nextaj = 0;
2996         for (j=0; nextaj<anzi; j++){
2997           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2998             ba_i[j] += aa[nextaj++];
2999           }
3000         }
3001         nextrow[k]++; nextai[k]++;
3002       }
3003     }
3004     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
3005   }
3006   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3008 
3009   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
3010   ierr = PetscFree(ba_i);CHKERRQ(ierr);
3011   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3012   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 static PetscEvent logkey_seqstompisym = 0;
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
3019 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
3020 {
3021   PetscErrorCode       ierr;
3022   Mat                  B_mpi;
3023   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3024   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3025   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3026   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3027   PetscInt             len,proc,*dnz,*onz;
3028   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3029   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3030   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3031   MPI_Status           *status;
3032   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3033   PetscBT              lnkbt;
3034   Mat_Merge_SeqsToMPI  *merge;
3035   PetscObjectContainer container;
3036 
3037   PetscFunctionBegin;
3038   if (!logkey_seqstompisym) {
3039     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3040   }
3041   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3042 
3043   /* make sure it is a PETSc comm */
3044   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3046   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3047 
3048   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3049   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3050 
3051   /* determine row ownership */
3052   /*---------------------------------------------------------*/
3053   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3054   if (m == PETSC_DECIDE) {
3055     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3056   } else {
3057     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3058   }
3059   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3060   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3061   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3062 
3063   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3064   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3065 
3066   /* determine the number of messages to send, their lengths */
3067   /*---------------------------------------------------------*/
3068   len_s  = merge->len_s;
3069 
3070   len = 0;  /* length of buf_si[] */
3071   merge->nsend = 0;
3072   for (proc=0; proc<size; proc++){
3073     len_si[proc] = 0;
3074     if (proc == rank){
3075       len_s[proc] = 0;
3076     } else {
3077       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3078       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3079     }
3080     if (len_s[proc]) {
3081       merge->nsend++;
3082       nrows = 0;
3083       for (i=owners[proc]; i<owners[proc+1]; i++){
3084         if (ai[i+1] > ai[i]) nrows++;
3085       }
3086       len_si[proc] = 2*(nrows+1);
3087       len += len_si[proc];
3088     }
3089   }
3090 
3091   /* determine the number and length of messages to receive for ij-structure */
3092   /*-------------------------------------------------------------------------*/
3093   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3094   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3095 
3096   /* post the Irecv of j-structure */
3097   /*-------------------------------*/
3098   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3099   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3100 
3101   /* post the Isend of j-structure */
3102   /*--------------------------------*/
3103   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3104   sj_waits = si_waits + merge->nsend;
3105 
3106   for (proc=0, k=0; proc<size; proc++){
3107     if (!len_s[proc]) continue;
3108     i = owners[proc];
3109     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3110     k++;
3111   }
3112 
3113   /* receives and sends of j-structure are complete */
3114   /*------------------------------------------------*/
3115   ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);
3116   ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);
3117 
3118   /* send and recv i-structure */
3119   /*---------------------------*/
3120   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3121   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3122 
3123   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3124   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3125   for (proc=0,k=0; proc<size; proc++){
3126     if (!len_s[proc]) continue;
3127     /* form outgoing message for i-structure:
3128          buf_si[0]:                 nrows to be sent
3129                [1:nrows]:           row index (global)
3130                [nrows+1:2*nrows+1]: i-structure index
3131     */
3132     /*-------------------------------------------*/
3133     nrows = len_si[proc]/2 - 1;
3134     buf_si_i    = buf_si + nrows+1;
3135     buf_si[0]   = nrows;
3136     buf_si_i[0] = 0;
3137     nrows = 0;
3138     for (i=owners[proc]; i<owners[proc+1]; i++){
3139       anzi = ai[i+1] - ai[i];
3140       if (anzi) {
3141         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3142         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3143         nrows++;
3144       }
3145     }
3146     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3147     k++;
3148     buf_si += len_si[proc];
3149   }
3150 
3151   ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);
3152   ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);
3153 
3154   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3155   for (i=0; i<merge->nrecv; i++){
3156     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3157   }
3158 
3159   ierr = PetscFree(len_si);CHKERRQ(ierr);
3160   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3161   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3162   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3163   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3164   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3165   ierr = PetscFree(status);CHKERRQ(ierr);
3166 
3167   /* compute a local seq matrix in each processor */
3168   /*----------------------------------------------*/
3169   /* allocate bi array and free space for accumulating nonzero column info */
3170   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3171   bi[0] = 0;
3172 
3173   /* create and initialize a linked list */
3174   nlnk = N+1;
3175   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3176 
3177   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3178   len = 0;
3179   len  = ai[owners[rank+1]] - ai[owners[rank]];
3180   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3181   current_space = free_space;
3182 
3183   /* determine symbolic info for each local row */
3184   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3185   nextrow = buf_ri_k + merge->nrecv;
3186   nextai  = nextrow + merge->nrecv;
3187   for (k=0; k<merge->nrecv; k++){
3188     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3189     nrows = *buf_ri_k[k];
3190     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3191     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3192   }
3193 
3194   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3195   len = 0;
3196   for (i=0;i<m;i++) {
3197     bnzi   = 0;
3198     /* add local non-zero cols of this proc's seqmat into lnk */
3199     arow   = owners[rank] + i;
3200     anzi   = ai[arow+1] - ai[arow];
3201     aj     = a->j + ai[arow];
3202     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3203     bnzi += nlnk;
3204     /* add received col data into lnk */
3205     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3206       if (i == *nextrow[k]) { /* i-th row */
3207         anzi = *(nextai[k]+1) - *nextai[k];
3208         aj   = buf_rj[k] + *nextai[k];
3209         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3210         bnzi += nlnk;
3211         nextrow[k]++; nextai[k]++;
3212       }
3213     }
3214     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3215 
3216     /* if free space is not available, make more free space */
3217     if (current_space->local_remaining<bnzi) {
3218       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3219       nspacedouble++;
3220     }
3221     /* copy data into free space, then initialize lnk */
3222     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3223     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3224 
3225     current_space->array           += bnzi;
3226     current_space->local_used      += bnzi;
3227     current_space->local_remaining -= bnzi;
3228 
3229     bi[i+1] = bi[i] + bnzi;
3230   }
3231 
3232   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3233 
3234   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3235   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3236   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3237 
3238   /* create symbolic parallel matrix B_mpi */
3239   /*---------------------------------------*/
3240   if (n==PETSC_DECIDE) {
3241     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3242   } else {
3243     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3244   }
3245   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3246   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3247   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3248 
3249   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3250   B_mpi->assembled     = PETSC_FALSE;
3251   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3252   merge->bi            = bi;
3253   merge->bj            = bj;
3254   merge->buf_ri        = buf_ri;
3255   merge->buf_rj        = buf_rj;
3256   merge->coi           = PETSC_NULL;
3257   merge->coj           = PETSC_NULL;
3258   merge->owners_co     = PETSC_NULL;
3259 
3260   /* attach the supporting struct to B_mpi for reuse */
3261   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3262   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3263   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3264   *mpimat = B_mpi;
3265 
3266   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3267   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 static PetscEvent logkey_seqstompi = 0;
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatMerge_SeqsToMPI"
3274 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3275 {
3276   PetscErrorCode   ierr;
3277 
3278   PetscFunctionBegin;
3279   if (!logkey_seqstompi) {
3280     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3281   }
3282   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3283   if (scall == MAT_INITIAL_MATRIX){
3284     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3285   }
3286   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3287   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3288   PetscFunctionReturn(0);
3289 }
3290 static PetscEvent logkey_getlocalmat = 0;
3291 #undef __FUNCT__
3292 #define __FUNCT__ "MatGetLocalMat"
3293 /*@C
3294      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3295 
3296     Not Collective
3297 
3298    Input Parameters:
3299 +    A - the matrix
3300 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3301 
3302    Output Parameter:
3303 .    A_loc - the local sequential matrix generated
3304 
3305     Level: developer
3306 
3307 @*/
3308 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3309 {
3310   PetscErrorCode  ierr;
3311   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3312   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3313   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3314   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3315   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3316   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3317 
3318   PetscFunctionBegin;
3319   if (!logkey_getlocalmat) {
3320     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3321   }
3322   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3323   if (scall == MAT_INITIAL_MATRIX){
3324     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3325     ci[0] = 0;
3326     for (i=0; i<am; i++){
3327       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3328     }
3329     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3330     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3331     k = 0;
3332     for (i=0; i<am; i++) {
3333       ncols_o = bi[i+1] - bi[i];
3334       ncols_d = ai[i+1] - ai[i];
3335       /* off-diagonal portion of A */
3336       for (jo=0; jo<ncols_o; jo++) {
3337         col = cmap[*bj];
3338         if (col >= cstart) break;
3339         cj[k]   = col; bj++;
3340         ca[k++] = *ba++;
3341       }
3342       /* diagonal portion of A */
3343       for (j=0; j<ncols_d; j++) {
3344         cj[k]   = cstart + *aj++;
3345         ca[k++] = *aa++;
3346       }
3347       /* off-diagonal portion of A */
3348       for (j=jo; j<ncols_o; j++) {
3349         cj[k]   = cmap[*bj++];
3350         ca[k++] = *ba++;
3351       }
3352     }
3353     /* put together the new matrix */
3354     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3355     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3356     /* Since these are PETSc arrays, change flags to free them as necessary. */
3357     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3358     mat->freedata = PETSC_TRUE;
3359     mat->nonew    = 0;
3360   } else if (scall == MAT_REUSE_MATRIX){
3361     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3362     ci = mat->i; cj = mat->j; ca = mat->a;
3363     for (i=0; i<am; i++) {
3364       /* off-diagonal portion of A */
3365       ncols_o = bi[i+1] - bi[i];
3366       for (jo=0; jo<ncols_o; jo++) {
3367         col = cmap[*bj];
3368         if (col >= cstart) break;
3369         *ca++ = *ba++; bj++;
3370       }
3371       /* diagonal portion of A */
3372       ncols_d = ai[i+1] - ai[i];
3373       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3374       /* off-diagonal portion of A */
3375       for (j=jo; j<ncols_o; j++) {
3376         *ca++ = *ba++; bj++;
3377       }
3378     }
3379   } else {
3380     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3381   }
3382 
3383   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 
3387 static PetscEvent logkey_getlocalmatcondensed = 0;
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatGetLocalMatCondensed"
3390 /*@C
3391      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3392 
3393     Not Collective
3394 
3395    Input Parameters:
3396 +    A - the matrix
3397 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3398 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3399 
3400    Output Parameter:
3401 .    A_loc - the local sequential matrix generated
3402 
3403     Level: developer
3404 
3405 @*/
3406 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3407 {
3408   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3409   PetscErrorCode    ierr;
3410   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3411   IS                isrowa,iscola;
3412   Mat               *aloc;
3413 
3414   PetscFunctionBegin;
3415   if (!logkey_getlocalmatcondensed) {
3416     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3417   }
3418   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3419   if (!row){
3420     start = a->rstart; end = a->rend;
3421     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3422   } else {
3423     isrowa = *row;
3424   }
3425   if (!col){
3426     start = a->cstart;
3427     cmap  = a->garray;
3428     nzA   = a->A->n;
3429     nzB   = a->B->n;
3430     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3431     ncols = 0;
3432     for (i=0; i<nzB; i++) {
3433       if (cmap[i] < start) idx[ncols++] = cmap[i];
3434       else break;
3435     }
3436     imark = i;
3437     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3438     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3439     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3440     ierr = PetscFree(idx);CHKERRQ(ierr);
3441   } else {
3442     iscola = *col;
3443   }
3444   if (scall != MAT_INITIAL_MATRIX){
3445     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3446     aloc[0] = *A_loc;
3447   }
3448   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3449   *A_loc = aloc[0];
3450   ierr = PetscFree(aloc);CHKERRQ(ierr);
3451   if (!row){
3452     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3453   }
3454   if (!col){
3455     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3456   }
3457   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3458   PetscFunctionReturn(0);
3459 }
3460 
3461 static PetscEvent logkey_GetBrowsOfAcols = 0;
3462 #undef __FUNCT__
3463 #define __FUNCT__ "MatGetBrowsOfAcols"
3464 /*@C
3465     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3466 
3467     Collective on Mat
3468 
3469    Input Parameters:
3470 +    A,B - the matrices in mpiaij format
3471 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3472 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3473 
3474    Output Parameter:
3475 +    rowb, colb - index sets of rows and columns of B to extract
3476 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3477 -    B_seq - the sequential matrix generated
3478 
3479     Level: developer
3480 
3481 @*/
3482 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3483 {
3484   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3485   PetscErrorCode    ierr;
3486   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3487   IS                isrowb,iscolb;
3488   Mat               *bseq;
3489 
3490   PetscFunctionBegin;
3491   if (a->cstart != b->rstart || a->cend != b->rend){
3492     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3493   }
3494   if (!logkey_GetBrowsOfAcols) {
3495     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3496   }
3497   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3498 
3499   if (scall == MAT_INITIAL_MATRIX){
3500     start = a->cstart;
3501     cmap  = a->garray;
3502     nzA   = a->A->n;
3503     nzB   = a->B->n;
3504     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3505     ncols = 0;
3506     for (i=0; i<nzB; i++) {  /* row < local row index */
3507       if (cmap[i] < start) idx[ncols++] = cmap[i];
3508       else break;
3509     }
3510     imark = i;
3511     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3512     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3513     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3514     ierr = PetscFree(idx);CHKERRQ(ierr);
3515     *brstart = imark;
3516     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3517   } else {
3518     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3519     isrowb = *rowb; iscolb = *colb;
3520     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3521     bseq[0] = *B_seq;
3522   }
3523   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3524   *B_seq = bseq[0];
3525   ierr = PetscFree(bseq);CHKERRQ(ierr);
3526   if (!rowb){
3527     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3528   } else {
3529     *rowb = isrowb;
3530   }
3531   if (!colb){
3532     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3533   } else {
3534     *colb = iscolb;
3535   }
3536   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 static PetscEvent logkey_GetBrowsOfAocols = 0;
3541 #undef __FUNCT__
3542 #define __FUNCT__ "MatGetBrowsOfAoCols"
3543 /*@C
3544     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3545     of the OFF-DIAGONAL portion of local A
3546 
3547     Collective on Mat
3548 
3549    Input Parameters:
3550 +    A,B - the matrices in mpiaij format
3551 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3552 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3553 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3554 
3555    Output Parameter:
3556 +    B_oth - the sequential matrix generated
3557 
3558     Level: developer
3559 
3560 @*/
3561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3562 {
3563   VecScatter_MPI_General *gen_to,*gen_from;
3564   PetscErrorCode         ierr;
3565   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3566   Mat_SeqAIJ             *b_oth;
3567   VecScatter             ctx=a->Mvctx;
3568   MPI_Comm               comm=ctx->comm;
3569   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3570   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3571   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3572   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3573   MPI_Request            *rwaits,*swaits;
3574   MPI_Status             *sstatus,rstatus;
3575   PetscInt               *cols;
3576   PetscScalar            *vals;
3577   PetscMPIInt            j;
3578 
3579   PetscFunctionBegin;
3580   if (a->cstart != b->rstart || a->cend != b->rend){
3581     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3582   }
3583   if (!logkey_GetBrowsOfAocols) {
3584     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3585   }
3586   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3587   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3588 
3589   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3590   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3591   rvalues  = gen_from->values; /* holds the length of sending row */
3592   svalues  = gen_to->values;   /* holds the length of receiving row */
3593   nrecvs   = gen_from->n;
3594   nsends   = gen_to->n;
3595   rwaits   = gen_from->requests;
3596   swaits   = gen_to->requests;
3597   srow     = gen_to->indices;   /* local row index to be sent */
3598   rstarts  = gen_from->starts;
3599   sstarts  = gen_to->starts;
3600   rprocs   = gen_from->procs;
3601   sprocs   = gen_to->procs;
3602   sstatus  = gen_to->sstatus;
3603 
3604   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3605   if (scall == MAT_INITIAL_MATRIX){
3606     /* i-array */
3607     /*---------*/
3608     /*  post receives */
3609     for (i=0; i<nrecvs; i++){
3610       rowlen = (PetscInt*)rvalues + rstarts[i];
3611       nrows = rstarts[i+1]-rstarts[i];
3612       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3613     }
3614 
3615     /* pack the outgoing message */
3616     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3617     rstartsj = sstartsj + nsends +1;
3618     sstartsj[0] = 0;  rstartsj[0] = 0;
3619     len = 0; /* total length of j or a array to be sent */
3620     k = 0;
3621     for (i=0; i<nsends; i++){
3622       rowlen = (PetscInt*)svalues + sstarts[i];
3623       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3624       for (j=0; j<nrows; j++) {
3625         row = srow[k] + b->rowners[rank]; /* global row idx */
3626         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3627         len += rowlen[j];
3628         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3629         k++;
3630       }
3631       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3632        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3633     }
3634     /* recvs and sends of i-array are completed */
3635     i = nrecvs;
3636     while (i--) {
3637       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3638     }
3639     if (nsends) {
3640       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3641     }
3642     /* allocate buffers for sending j and a arrays */
3643     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3644     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3645 
3646     /* create i-array of B_oth */
3647     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3648     b_othi[0] = 0;
3649     len = 0; /* total length of j or a array to be received */
3650     k = 0;
3651     for (i=0; i<nrecvs; i++){
3652       rowlen = (PetscInt*)rvalues + rstarts[i];
3653       nrows = rstarts[i+1]-rstarts[i];
3654       for (j=0; j<nrows; j++) {
3655         b_othi[k+1] = b_othi[k] + rowlen[j];
3656         len += rowlen[j]; k++;
3657       }
3658       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3659     }
3660 
3661     /* allocate space for j and a arrrays of B_oth */
3662     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3663     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3664 
3665     /* j-array */
3666     /*---------*/
3667     /*  post receives of j-array */
3668     for (i=0; i<nrecvs; i++){
3669       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3670       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3671     }
3672     k = 0;
3673     for (i=0; i<nsends; i++){
3674       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3675       bufJ = bufj+sstartsj[i];
3676       for (j=0; j<nrows; j++) {
3677         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3678         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3679         for (l=0; l<ncols; l++){
3680           *bufJ++ = cols[l];
3681         }
3682         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3683       }
3684       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3685     }
3686 
3687     /* recvs and sends of j-array are completed */
3688     i = nrecvs;
3689     while (i--) {
3690       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3691     }
3692     if (nsends) {
3693       ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3694     }
3695   } else if (scall == MAT_REUSE_MATRIX){
3696     sstartsj = *startsj;
3697     rstartsj = sstartsj + nsends +1;
3698     bufa     = *bufa_ptr;
3699     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3700     b_otha   = b_oth->a;
3701   } else {
3702     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3703   }
3704 
3705   /* a-array */
3706   /*---------*/
3707   /*  post receives of a-array */
3708   for (i=0; i<nrecvs; i++){
3709     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3710     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3711   }
3712   k = 0;
3713   for (i=0; i<nsends; i++){
3714     nrows = sstarts[i+1]-sstarts[i];
3715     bufA = bufa+sstartsj[i];
3716     for (j=0; j<nrows; j++) {
3717       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3718       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3719       for (l=0; l<ncols; l++){
3720         *bufA++ = vals[l];
3721       }
3722       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3723 
3724     }
3725     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3726   }
3727   /* recvs and sends of a-array are completed */
3728   i = nrecvs;
3729   while (i--) {
3730     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3731   }
3732    if (nsends) {
3733     ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);
3734   }
3735 
3736   if (scall == MAT_INITIAL_MATRIX){
3737     /* put together the new matrix */
3738     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3739 
3740     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3741     /* Since these are PETSc arrays, change flags to free them as necessary. */
3742     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3743     b_oth->freedata = PETSC_TRUE;
3744     b_oth->nonew    = 0;
3745 
3746     ierr = PetscFree(bufj);CHKERRQ(ierr);
3747     if (!startsj || !bufa_ptr){
3748       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3749       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3750     } else {
3751       *startsj  = sstartsj;
3752       *bufa_ptr = bufa;
3753     }
3754   }
3755   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3756 
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 /*MC
3761    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3762 
3763    Options Database Keys:
3764 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3765 
3766   Level: beginner
3767 
3768 .seealso: MatCreateMPIAIJ
3769 M*/
3770 
3771 EXTERN_C_BEGIN
3772 #undef __FUNCT__
3773 #define __FUNCT__ "MatCreate_MPIAIJ"
3774 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3775 {
3776   Mat_MPIAIJ     *b;
3777   PetscErrorCode ierr;
3778   PetscInt       i;
3779   PetscMPIInt    size;
3780 
3781   PetscFunctionBegin;
3782   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3783 
3784   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3785   B->data         = (void*)b;
3786   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3787   B->factor       = 0;
3788   B->bs           = 1;
3789   B->assembled    = PETSC_FALSE;
3790   B->mapping      = 0;
3791 
3792   B->insertmode      = NOT_SET_VALUES;
3793   b->size            = size;
3794   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3795 
3796   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3797   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3798 
3799   /* the information in the maps duplicates the information computed below, eventually
3800      we should remove the duplicate information that is not contained in the maps */
3801   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3802   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3803 
3804   /* build local table of row and column ownerships */
3805   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3806   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3807   b->cowners = b->rowners + b->size + 2;
3808   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3809   b->rowners[0] = 0;
3810   for (i=2; i<=b->size; i++) {
3811     b->rowners[i] += b->rowners[i-1];
3812   }
3813   b->rstart = b->rowners[b->rank];
3814   b->rend   = b->rowners[b->rank+1];
3815   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3816   b->cowners[0] = 0;
3817   for (i=2; i<=b->size; i++) {
3818     b->cowners[i] += b->cowners[i-1];
3819   }
3820   b->cstart = b->cowners[b->rank];
3821   b->cend   = b->cowners[b->rank+1];
3822 
3823   /* build cache for off array entries formed */
3824   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3825   b->donotstash  = PETSC_FALSE;
3826   b->colmap      = 0;
3827   b->garray      = 0;
3828   b->roworiented = PETSC_TRUE;
3829 
3830   /* stuff used for matrix vector multiply */
3831   b->lvec      = PETSC_NULL;
3832   b->Mvctx     = PETSC_NULL;
3833 
3834   /* stuff for MatGetRow() */
3835   b->rowindices   = 0;
3836   b->rowvalues    = 0;
3837   b->getrowactive = PETSC_FALSE;
3838 
3839   /* Explicitly create 2 MATSEQAIJ matrices. */
3840   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3841   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3842   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3843   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3844   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3845   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3846 
3847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3848                                      "MatStoreValues_MPIAIJ",
3849                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3851                                      "MatRetrieveValues_MPIAIJ",
3852                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3854 				     "MatGetDiagonalBlock_MPIAIJ",
3855                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3857 				     "MatIsTranspose_MPIAIJ",
3858 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3860 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3861 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3863 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3864 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3866 				     "MatDiagonalScaleLocal_MPIAIJ",
3867 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3868   PetscFunctionReturn(0);
3869 }
3870 EXTERN_C_END
3871