xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 6f19db18ccbfac7ec73b5109dcbb5ec7c19ccf8d)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421 
422 #if defined(PETSC_HAVE_MUMPS)
423   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
424   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
425 #endif
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
431 int MatZeroEntries_MPIAIJ(Mat A)
432 {
433   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
434   int        ierr;
435 
436   PetscFunctionBegin;
437   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
438   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
439   PetscFunctionReturn(0);
440 }
441 
442 #undef __FUNCT__
443 #define __FUNCT__ "MatZeroRows_MPIAIJ"
444 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
445 {
446   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
447   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
448   int            *nprocs,j,idx,nsends,row;
449   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
450   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
451   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
452   MPI_Comm       comm = A->comm;
453   MPI_Request    *send_waits,*recv_waits;
454   MPI_Status     recv_status,*send_status;
455   IS             istmp;
456   PetscTruth     found;
457 
458   PetscFunctionBegin;
459   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
460   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
461 
462   /*  first count number of contributors to each processor */
463   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
464   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
465   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
466   for (i=0; i<N; i++) {
467     idx = rows[i];
468     found = PETSC_FALSE;
469     for (j=0; j<size; j++) {
470       if (idx >= owners[j] && idx < owners[j+1]) {
471         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
472       }
473     }
474     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
475   }
476   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
477 
478   /* inform other processors of number of messages and max length*/
479   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
480 
481   /* post receives:   */
482   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
483   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
484   for (i=0; i<nrecvs; i++) {
485     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
486   }
487 
488   /* do sends:
489       1) starts[i] gives the starting index in svalues for stuff going to
490          the ith processor
491   */
492   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
493   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
494   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
495   starts[0] = 0;
496   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
497   for (i=0; i<N; i++) {
498     svalues[starts[owner[i]]++] = rows[i];
499   }
500   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
501 
502   starts[0] = 0;
503   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
504   count = 0;
505   for (i=0; i<size; i++) {
506     if (nprocs[2*i+1]) {
507       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
508     }
509   }
510   ierr = PetscFree(starts);CHKERRQ(ierr);
511 
512   base = owners[rank];
513 
514   /*  wait on receives */
515   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
516   source = lens + nrecvs;
517   count  = nrecvs; slen = 0;
518   while (count) {
519     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
520     /* unpack receives into our local space */
521     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
522     source[imdex]  = recv_status.MPI_SOURCE;
523     lens[imdex]    = n;
524     slen          += n;
525     count--;
526   }
527   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
528 
529   /* move the data into the send scatter */
530   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
531   count = 0;
532   for (i=0; i<nrecvs; i++) {
533     values = rvalues + i*nmax;
534     for (j=0; j<lens[i]; j++) {
535       lrows[count++] = values[j] - base;
536     }
537   }
538   ierr = PetscFree(rvalues);CHKERRQ(ierr);
539   ierr = PetscFree(lens);CHKERRQ(ierr);
540   ierr = PetscFree(owner);CHKERRQ(ierr);
541   ierr = PetscFree(nprocs);CHKERRQ(ierr);
542 
543   /* actually zap the local rows */
544   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
545   PetscLogObjectParent(A,istmp);
546 
547   /*
548         Zero the required rows. If the "diagonal block" of the matrix
549      is square and the user wishes to set the diagonal we use seperate
550      code so that MatSetValues() is not called for each diagonal allocating
551      new memory, thus calling lots of mallocs and slowing things down.
552 
553        Contributed by: Mathew Knepley
554   */
555   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
556   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
557   if (diag && (l->A->M == l->A->N)) {
558     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
559   } else if (diag) {
560     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
561     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
562       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
563 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
564     }
565     for (i = 0; i < slen; i++) {
566       row  = lrows[i] + rstart;
567       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
568     }
569     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
570     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
571   } else {
572     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
573   }
574   ierr = ISDestroy(istmp);CHKERRQ(ierr);
575   ierr = PetscFree(lrows);CHKERRQ(ierr);
576 
577   /* wait on sends */
578   if (nsends) {
579     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
580     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
581     ierr = PetscFree(send_status);CHKERRQ(ierr);
582   }
583   ierr = PetscFree(send_waits);CHKERRQ(ierr);
584   ierr = PetscFree(svalues);CHKERRQ(ierr);
585 
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatMult_MPIAIJ"
591 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
592 {
593   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
594   int        ierr,nt;
595 
596   PetscFunctionBegin;
597   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
598   if (nt != A->n) {
599     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
600   }
601   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
603   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
604   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNCT__
609 #define __FUNCT__ "MatMultAdd_MPIAIJ"
610 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
611 {
612   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
613   int        ierr;
614 
615   PetscFunctionBegin;
616   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
617   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
618   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
619   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
620   PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
625 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
626 {
627   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
628   int        ierr;
629 
630   PetscFunctionBegin;
631   /* do nondiagonal part */
632   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
633   /* send it on its way */
634   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
635   /* do local part */
636   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
637   /* receive remote parts: note this assumes the values are not actually */
638   /* inserted in yy until the next line, which is true for my implementation*/
639   /* but is not perhaps always true. */
640   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 EXTERN_C_BEGIN
645 #undef __FUNCT__
646 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
647 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
648 {
649   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
650   Mat        A = Aij->A, B,Aoff = Aij->B,Boff;
651   MatType    type;
652   IS         Me,Notme;
653   int        M,N,first,last,*notme,i, ierr;
654 
655   PetscFunctionBegin;
656 
657   /* Compatible types */
658   ierr = MatGetType(Bmat,&type); CHKERRQ(ierr);
659   ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr);
660   if (!*f) SETERRQ(1,"Second matrix needs to be MPIAIJ too");
661 
662   /* Easy test: symmetric diagonal block */
663   Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B;
664   ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr);
665   if (!*f) PetscFunctionReturn(0);
666 
667   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
668   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
669   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
670   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
671   for (i=0; i<first; i++) notme[i] = i;
672   for (i=last; i<M; i++) notme[i-last+first] = i;
673   ierr = ISCreateGeneral
674     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
675   ierr = ISCreateStride
676     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
677   ierr = MatGetSubMatrix
678     (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr);
679   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
680   ierr = MatDestroy(Boff); CHKERRQ(ierr);
681   ierr = ISDestroy(Me); CHKERRQ(ierr);
682   ierr = ISDestroy(Notme); CHKERRQ(ierr);
683 
684   PetscFunctionReturn(0);
685 }
686 EXTERN_C_END
687 
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
690 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
691 {
692   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
693   int        ierr;
694 
695   PetscFunctionBegin;
696   /* do nondiagonal part */
697   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
698   /* send it on its way */
699   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
700   /* do local part */
701   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
702   /* receive remote parts: note this assumes the values are not actually */
703   /* inserted in yy until the next line, which is true for my implementation*/
704   /* but is not perhaps always true. */
705   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
706   PetscFunctionReturn(0);
707 }
708 
709 /*
710   This only works correctly for square matrices where the subblock A->A is the
711    diagonal block
712 */
713 #undef __FUNCT__
714 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
715 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
716 {
717   int        ierr;
718   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
719 
720   PetscFunctionBegin;
721   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
722   if (a->rstart != a->cstart || a->rend != a->cend) {
723     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
724   }
725   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatScale_MPIAIJ"
731 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
732 {
733   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
734   int        ierr;
735 
736   PetscFunctionBegin;
737   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
738   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPIAIJ"
744 int MatDestroy_MPIAIJ(Mat mat)
745 {
746   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
747   int        ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
755   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
756   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
757 #if defined (PETSC_USE_CTABLE)
758   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
759 #else
760   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
761 #endif
762   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
763   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
764   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
765   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
766   ierr = PetscFree(aij);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 
770 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
771 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
772 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPIAIJ_Binary"
776 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
779   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
780   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
781   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
782   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
783   PetscScalar       *column_values;
784 
785   PetscFunctionBegin;
786   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
787   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
788   nz   = A->nz + B->nz;
789   if (rank == 0) {
790     header[0] = MAT_FILE_COOKIE;
791     header[1] = mat->M;
792     header[2] = mat->N;
793     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
794     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
795     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
796     /* get largest number of rows any processor has */
797     rlen = mat->m;
798     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
799     for (i=1; i<size; i++) {
800       rlen = PetscMax(rlen,range[i+1] - range[i]);
801     }
802   } else {
803     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
804     rlen = mat->m;
805   }
806 
807   /* load up the local row counts */
808   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
809   for (i=0; i<mat->m; i++) {
810     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
811   }
812 
813   /* store the row lengths to the file */
814   if (rank == 0) {
815     MPI_Status status;
816     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
817     for (i=1; i<size; i++) {
818       rlen = range[i+1] - range[i];
819       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
820       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
821     }
822   } else {
823     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
824   }
825   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
826 
827   /* load up the local column indices */
828   nzmax = nz; /* )th processor needs space a largest processor needs */
829   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
830   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
831   cnt  = 0;
832   for (i=0; i<mat->m; i++) {
833     for (j=B->i[i]; j<B->i[i+1]; j++) {
834       if ( (col = garray[B->j[j]]) > cstart) break;
835       column_indices[cnt++] = col;
836     }
837     for (k=A->i[i]; k<A->i[i+1]; k++) {
838       column_indices[cnt++] = A->j[k] + cstart;
839     }
840     for (; j<B->i[i+1]; j++) {
841       column_indices[cnt++] = garray[B->j[j]];
842     }
843   }
844   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
845 
846   /* store the column indices to the file */
847   if (rank == 0) {
848     MPI_Status status;
849     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
850     for (i=1; i<size; i++) {
851       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
852       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
853       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
854       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
855     }
856   } else {
857     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
858     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
859   }
860   ierr = PetscFree(column_indices);CHKERRQ(ierr);
861 
862   /* load up the local column values */
863   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
864   cnt  = 0;
865   for (i=0; i<mat->m; i++) {
866     for (j=B->i[i]; j<B->i[i+1]; j++) {
867       if ( garray[B->j[j]] > cstart) break;
868       column_values[cnt++] = B->a[j];
869     }
870     for (k=A->i[i]; k<A->i[i+1]; k++) {
871       column_values[cnt++] = A->a[k];
872     }
873     for (; j<B->i[i+1]; j++) {
874       column_values[cnt++] = B->a[j];
875     }
876   }
877   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
878 
879   /* store the column values to the file */
880   if (rank == 0) {
881     MPI_Status status;
882     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
883     for (i=1; i<size; i++) {
884       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
885       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
886       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
887       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
888     }
889   } else {
890     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
891     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
892   }
893   ierr = PetscFree(column_values);CHKERRQ(ierr);
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
899 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
900 {
901   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
902   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
903   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
904   PetscTruth        isdraw,isascii,flg,isbinary;
905   PetscViewer       sviewer;
906   PetscViewerFormat format;
907 
908   PetscFunctionBegin;
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
912   if (isascii) {
913     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
914     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915       MatInfo info;
916       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
917       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
918       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
919       if (flg) {
920         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
921 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
922       } else {
923         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
924 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
925       }
926       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
927       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
928       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
929       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
930       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
931       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
932       PetscFunctionReturn(0);
933     } else if (format == PETSC_VIEWER_ASCII_INFO) {
934       PetscFunctionReturn(0);
935     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
936 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
937       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
938 #endif
939 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
940       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
941 #endif
942 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
943       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
944 #endif
945       PetscFunctionReturn(0);
946     }
947   } else if (isbinary) {
948     if (size == 1) {
949       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
950       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
951     } else {
952       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
953     }
954     PetscFunctionReturn(0);
955   } else if (isdraw) {
956     PetscDraw  draw;
957     PetscTruth isnull;
958     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
959     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
960   }
961 
962   if (size == 1) {
963     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
964     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
965   } else {
966     /* assemble the entire matrix onto first processor. */
967     Mat         A;
968     Mat_SeqAIJ *Aloc;
969     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
970     PetscScalar *a;
971 
972     if (!rank) {
973       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
974     } else {
975       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
976     }
977     PetscLogObjectParent(mat,A);
978 
979     /* copy over the A part */
980     Aloc = (Mat_SeqAIJ*)aij->A->data;
981     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
982     row = aij->rstart;
983     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
984     for (i=0; i<m; i++) {
985       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
986       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
987     }
988     aj = Aloc->j;
989     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
990 
991     /* copy over the B part */
992     Aloc = (Mat_SeqAIJ*)aij->B->data;
993     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
994     row  = aij->rstart;
995     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
996     ct   = cols;
997     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
998     for (i=0; i<m; i++) {
999       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1000       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1001     }
1002     ierr = PetscFree(ct);CHKERRQ(ierr);
1003     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1004     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1005     /*
1006        Everyone has to call to draw the matrix since the graphics waits are
1007        synchronized across all processors that share the PetscDraw object
1008     */
1009     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1012       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1013     }
1014     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1015     ierr = MatDestroy(A);CHKERRQ(ierr);
1016   }
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatView_MPIAIJ"
1022 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1023 {
1024   int        ierr;
1025   PetscTruth isascii,isdraw,issocket,isbinary;
1026 
1027   PetscFunctionBegin;
1028   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1029   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1030   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1031   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1032   if (isascii || isdraw || isbinary || issocket) {
1033     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1034   } else {
1035     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1036   }
1037   PetscFunctionReturn(0);
1038 }
1039 
1040 
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatRelax_MPIAIJ"
1044 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1045 {
1046   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1047   int          ierr;
1048   Vec          bb1;
1049   PetscScalar  mone=-1.0;
1050 
1051   PetscFunctionBegin;
1052   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1053 
1054   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1055 
1056   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1057     if (flag & SOR_ZERO_INITIAL_GUESS) {
1058       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1059       its--;
1060     }
1061 
1062     while (its--) {
1063       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1064       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1065 
1066       /* update rhs: bb1 = bb - B*x */
1067       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1068       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1069 
1070       /* local sweep */
1071       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1072       CHKERRQ(ierr);
1073     }
1074   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1075     if (flag & SOR_ZERO_INITIAL_GUESS) {
1076       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1077       its--;
1078     }
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1092     if (flag & SOR_ZERO_INITIAL_GUESS) {
1093       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1094       its--;
1095     }
1096     while (its--) {
1097       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1098       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1099 
1100       /* update rhs: bb1 = bb - B*x */
1101       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1102       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1103 
1104       /* local sweep */
1105       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1106       CHKERRQ(ierr);
1107     }
1108   } else {
1109     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1110   }
1111 
1112   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1118 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1119 {
1120   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1121   Mat        A = mat->A,B = mat->B;
1122   int        ierr;
1123   PetscReal  isend[5],irecv[5];
1124 
1125   PetscFunctionBegin;
1126   info->block_size     = 1.0;
1127   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1129   isend[3] = info->memory;  isend[4] = info->mallocs;
1130   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1131   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1132   isend[3] += info->memory;  isend[4] += info->mallocs;
1133   if (flag == MAT_LOCAL) {
1134     info->nz_used      = isend[0];
1135     info->nz_allocated = isend[1];
1136     info->nz_unneeded  = isend[2];
1137     info->memory       = isend[3];
1138     info->mallocs      = isend[4];
1139   } else if (flag == MAT_GLOBAL_MAX) {
1140     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1141     info->nz_used      = irecv[0];
1142     info->nz_allocated = irecv[1];
1143     info->nz_unneeded  = irecv[2];
1144     info->memory       = irecv[3];
1145     info->mallocs      = irecv[4];
1146   } else if (flag == MAT_GLOBAL_SUM) {
1147     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1148     info->nz_used      = irecv[0];
1149     info->nz_allocated = irecv[1];
1150     info->nz_unneeded  = irecv[2];
1151     info->memory       = irecv[3];
1152     info->mallocs      = irecv[4];
1153   }
1154   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1155   info->fill_ratio_needed = 0;
1156   info->factor_mallocs    = 0;
1157   info->rows_global       = (double)matin->M;
1158   info->columns_global    = (double)matin->N;
1159   info->rows_local        = (double)matin->m;
1160   info->columns_local     = (double)matin->N;
1161 
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatSetOption_MPIAIJ"
1167 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1168 {
1169   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1170   int        ierr;
1171 
1172   PetscFunctionBegin;
1173   switch (op) {
1174   case MAT_NO_NEW_NONZERO_LOCATIONS:
1175   case MAT_YES_NEW_NONZERO_LOCATIONS:
1176   case MAT_COLUMNS_UNSORTED:
1177   case MAT_COLUMNS_SORTED:
1178   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1179   case MAT_KEEP_ZEROED_ROWS:
1180   case MAT_NEW_NONZERO_LOCATION_ERR:
1181   case MAT_USE_INODES:
1182   case MAT_DO_NOT_USE_INODES:
1183   case MAT_IGNORE_ZERO_ENTRIES:
1184     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1185     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1186     break;
1187   case MAT_ROW_ORIENTED:
1188     a->roworiented = PETSC_TRUE;
1189     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1190     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1191     break;
1192   case MAT_ROWS_SORTED:
1193   case MAT_ROWS_UNSORTED:
1194   case MAT_YES_NEW_DIAGONALS:
1195     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1196     break;
1197   case MAT_COLUMN_ORIENTED:
1198     a->roworiented = PETSC_FALSE;
1199     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1200     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1201     break;
1202   case MAT_IGNORE_OFF_PROC_ENTRIES:
1203     a->donotstash = PETSC_TRUE;
1204     break;
1205   case MAT_NO_NEW_DIAGONALS:
1206     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1207   default:
1208     SETERRQ(PETSC_ERR_SUP,"unknown option");
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatGetRow_MPIAIJ"
1215 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1216 {
1217   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1218   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1219   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1220   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1221   int          *cmap,*idx_p;
1222 
1223   PetscFunctionBegin;
1224   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1225   mat->getrowactive = PETSC_TRUE;
1226 
1227   if (!mat->rowvalues && (idx || v)) {
1228     /*
1229         allocate enough space to hold information from the longest row.
1230     */
1231     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1232     int     max = 1,tmp;
1233     for (i=0; i<matin->m; i++) {
1234       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1235       if (max < tmp) { max = tmp; }
1236     }
1237     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1238     mat->rowindices = (int*)(mat->rowvalues + max);
1239   }
1240 
1241   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1242   lrow = row - rstart;
1243 
1244   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1245   if (!v)   {pvA = 0; pvB = 0;}
1246   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1247   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1248   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1249   nztot = nzA + nzB;
1250 
1251   cmap  = mat->garray;
1252   if (v  || idx) {
1253     if (nztot) {
1254       /* Sort by increasing column numbers, assuming A and B already sorted */
1255       int imark = -1;
1256       if (v) {
1257         *v = v_p = mat->rowvalues;
1258         for (i=0; i<nzB; i++) {
1259           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1260           else break;
1261         }
1262         imark = i;
1263         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1264         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1265       }
1266       if (idx) {
1267         *idx = idx_p = mat->rowindices;
1268         if (imark > -1) {
1269           for (i=0; i<imark; i++) {
1270             idx_p[i] = cmap[cworkB[i]];
1271           }
1272         } else {
1273           for (i=0; i<nzB; i++) {
1274             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1275             else break;
1276           }
1277           imark = i;
1278         }
1279         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1280         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1281       }
1282     } else {
1283       if (idx) *idx = 0;
1284       if (v)   *v   = 0;
1285     }
1286   }
1287   *nz = nztot;
1288   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1289   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 #undef __FUNCT__
1294 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1295 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1296 {
1297   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1298 
1299   PetscFunctionBegin;
1300   if (aij->getrowactive == PETSC_FALSE) {
1301     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1302   }
1303   aij->getrowactive = PETSC_FALSE;
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatNorm_MPIAIJ"
1309 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1310 {
1311   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1313   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1314   PetscReal    sum = 0.0;
1315   PetscScalar  *v;
1316 
1317   PetscFunctionBegin;
1318   if (aij->size == 1) {
1319     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1320   } else {
1321     if (type == NORM_FROBENIUS) {
1322       v = amat->a;
1323       for (i=0; i<amat->nz; i++) {
1324 #if defined(PETSC_USE_COMPLEX)
1325         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1326 #else
1327         sum += (*v)*(*v); v++;
1328 #endif
1329       }
1330       v = bmat->a;
1331       for (i=0; i<bmat->nz; i++) {
1332 #if defined(PETSC_USE_COMPLEX)
1333         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1334 #else
1335         sum += (*v)*(*v); v++;
1336 #endif
1337       }
1338       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1339       *norm = sqrt(*norm);
1340     } else if (type == NORM_1) { /* max column norm */
1341       PetscReal *tmp,*tmp2;
1342       int    *jj,*garray = aij->garray;
1343       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1344       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1345       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1346       *norm = 0.0;
1347       v = amat->a; jj = amat->j;
1348       for (j=0; j<amat->nz; j++) {
1349         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1350       }
1351       v = bmat->a; jj = bmat->j;
1352       for (j=0; j<bmat->nz; j++) {
1353         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1354       }
1355       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1356       for (j=0; j<mat->N; j++) {
1357         if (tmp2[j] > *norm) *norm = tmp2[j];
1358       }
1359       ierr = PetscFree(tmp);CHKERRQ(ierr);
1360       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1361     } else if (type == NORM_INFINITY) { /* max row norm */
1362       PetscReal ntemp = 0.0;
1363       for (j=0; j<aij->A->m; j++) {
1364         v = amat->a + amat->i[j] + shift;
1365         sum = 0.0;
1366         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1367           sum += PetscAbsScalar(*v); v++;
1368         }
1369         v = bmat->a + bmat->i[j] + shift;
1370         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1371           sum += PetscAbsScalar(*v); v++;
1372         }
1373         if (sum > ntemp) ntemp = sum;
1374       }
1375       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1376     } else {
1377       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1378     }
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatTranspose_MPIAIJ"
1385 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1386 {
1387   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1388   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1389   int          ierr,shift = Aloc->indexshift;
1390   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1391   Mat          B;
1392   PetscScalar  *array;
1393 
1394   PetscFunctionBegin;
1395   if (!matout && M != N) {
1396     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1397   }
1398 
1399   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1400 
1401   /* copy over the A part */
1402   Aloc = (Mat_SeqAIJ*)a->A->data;
1403   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1404   row = a->rstart;
1405   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1406   for (i=0; i<m; i++) {
1407     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1408     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1409   }
1410   aj = Aloc->j;
1411   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1412 
1413   /* copy over the B part */
1414   Aloc = (Mat_SeqAIJ*)a->B->data;
1415   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1416   row  = a->rstart;
1417   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1418   ct   = cols;
1419   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1420   for (i=0; i<m; i++) {
1421     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1422     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1423   }
1424   ierr = PetscFree(ct);CHKERRQ(ierr);
1425   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427   if (matout) {
1428     *matout = B;
1429   } else {
1430     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1437 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1438 {
1439   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1440   Mat        a = aij->A,b = aij->B;
1441   int        ierr,s1,s2,s3;
1442 
1443   PetscFunctionBegin;
1444   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1445   if (rr) {
1446     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1447     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1448     /* Overlap communication with computation. */
1449     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1450   }
1451   if (ll) {
1452     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1453     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1454     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1455   }
1456   /* scale  the diagonal block */
1457   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1458 
1459   if (rr) {
1460     /* Do a scatter end and then right scale the off-diagonal block */
1461     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1462     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1463   }
1464 
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1471 int MatPrintHelp_MPIAIJ(Mat A)
1472 {
1473   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1474   int        ierr;
1475 
1476   PetscFunctionBegin;
1477   if (!a->rank) {
1478     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1485 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1486 {
1487   PetscFunctionBegin;
1488   *bs = 1;
1489   PetscFunctionReturn(0);
1490 }
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1493 int MatSetUnfactored_MPIAIJ(Mat A)
1494 {
1495   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1496   int        ierr;
1497 
1498   PetscFunctionBegin;
1499   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatEqual_MPIAIJ"
1505 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1506 {
1507   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1508   Mat        a,b,c,d;
1509   PetscTruth flg;
1510   int        ierr;
1511 
1512   PetscFunctionBegin;
1513   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1514   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1515   a = matA->A; b = matA->B;
1516   c = matB->A; d = matB->B;
1517 
1518   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1519   if (flg == PETSC_TRUE) {
1520     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1521   }
1522   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatCopy_MPIAIJ"
1528 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1529 {
1530   int        ierr;
1531   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1532   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1533   PetscTruth flg;
1534 
1535   PetscFunctionBegin;
1536   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1537   if (str != SAME_NONZERO_PATTERN || !flg) {
1538     /* because of the column compression in the off-processor part of the matrix a->B,
1539        the number of columns in a->B and b->B may be different, hence we cannot call
1540        the MatCopy() directly on the two parts. If need be, we can provide a more
1541        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1542        then copying the submatrices */
1543     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1544   } else {
1545     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1546     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 #undef __FUNCT__
1552 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1553 int MatSetUpPreallocation_MPIAIJ(Mat A)
1554 {
1555   int        ierr;
1556 
1557   PetscFunctionBegin;
1558   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1563 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1564 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1565 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1566 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1567 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1568 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1569 #endif
1570 
1571 #include "petscblaslapack.h"
1572 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatAXPY_MPIAIJ"
1575 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1576 {
1577   int        ierr,one=1,i;
1578   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1579   Mat_SeqAIJ *x,*y;
1580 
1581   PetscFunctionBegin;
1582   if (str == SAME_NONZERO_PATTERN) {
1583     x = (Mat_SeqAIJ *)xx->A->data;
1584     y = (Mat_SeqAIJ *)yy->A->data;
1585     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1586     x = (Mat_SeqAIJ *)xx->B->data;
1587     y = (Mat_SeqAIJ *)yy->B->data;
1588     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1589   } else if (str == SUBSET_NONZERO_PATTERN) {
1590     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1591 
1592     x = (Mat_SeqAIJ *)xx->B->data;
1593     y = (Mat_SeqAIJ *)yy->B->data;
1594     if (y->xtoy && y->XtoY != xx->B) {
1595       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1596       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1597     }
1598     if (!y->xtoy) { /* get xtoy */
1599       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1600       y->XtoY = xx->B;
1601     }
1602     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1603   } else {
1604     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1605   }
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 /* -------------------------------------------------------------------*/
1610 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1611        MatGetRow_MPIAIJ,
1612        MatRestoreRow_MPIAIJ,
1613        MatMult_MPIAIJ,
1614        MatMultAdd_MPIAIJ,
1615        MatMultTranspose_MPIAIJ,
1616        MatMultTransposeAdd_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620        0,
1621        0,
1622        0,
1623        MatRelax_MPIAIJ,
1624        MatTranspose_MPIAIJ,
1625        MatGetInfo_MPIAIJ,
1626        MatEqual_MPIAIJ,
1627        MatGetDiagonal_MPIAIJ,
1628        MatDiagonalScale_MPIAIJ,
1629        MatNorm_MPIAIJ,
1630        MatAssemblyBegin_MPIAIJ,
1631        MatAssemblyEnd_MPIAIJ,
1632        0,
1633        MatSetOption_MPIAIJ,
1634        MatZeroEntries_MPIAIJ,
1635        MatZeroRows_MPIAIJ,
1636 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1637        MatLUFactorSymbolic_MPIAIJ_TFS,
1638 #else
1639        0,
1640 #endif
1641        0,
1642        0,
1643        0,
1644        MatSetUpPreallocation_MPIAIJ,
1645        0,
1646        0,
1647        0,
1648        0,
1649        MatDuplicate_MPIAIJ,
1650        0,
1651        0,
1652        0,
1653        0,
1654        MatAXPY_MPIAIJ,
1655        MatGetSubMatrices_MPIAIJ,
1656        MatIncreaseOverlap_MPIAIJ,
1657        MatGetValues_MPIAIJ,
1658        MatCopy_MPIAIJ,
1659        MatPrintHelp_MPIAIJ,
1660        MatScale_MPIAIJ,
1661        0,
1662        0,
1663        0,
1664        MatGetBlockSize_MPIAIJ,
1665        0,
1666        0,
1667        0,
1668        0,
1669        MatFDColoringCreate_MPIAIJ,
1670        0,
1671        MatSetUnfactored_MPIAIJ,
1672        0,
1673        0,
1674        MatGetSubMatrix_MPIAIJ,
1675        MatDestroy_MPIAIJ,
1676        MatView_MPIAIJ,
1677        MatGetPetscMaps_Petsc,
1678        0,
1679        0,
1680        0,
1681        0,
1682        0,
1683        0,
1684        0,
1685        0,
1686        MatSetColoring_MPIAIJ,
1687        MatSetValuesAdic_MPIAIJ,
1688        MatSetValuesAdifor_MPIAIJ
1689 };
1690 
1691 /* ----------------------------------------------------------------------------------------*/
1692 
1693 EXTERN_C_BEGIN
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1696 int MatStoreValues_MPIAIJ(Mat mat)
1697 {
1698   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1699   int        ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1703   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 EXTERN_C_END
1707 
1708 EXTERN_C_BEGIN
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1711 int MatRetrieveValues_MPIAIJ(Mat mat)
1712 {
1713   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1714   int        ierr;
1715 
1716   PetscFunctionBegin;
1717   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1718   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 EXTERN_C_END
1722 
1723 #include "petscpc.h"
1724 EXTERN_C_BEGIN
1725 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1726 EXTERN_C_END
1727 
1728 EXTERN_C_BEGIN
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCreate_MPIAIJ"
1731 int MatCreate_MPIAIJ(Mat B)
1732 {
1733   Mat_MPIAIJ *b;
1734   int        ierr,i,size;
1735 
1736   PetscFunctionBegin;
1737   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1738 
1739   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1740   B->data         = (void*)b;
1741   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1742   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1743   B->factor       = 0;
1744   B->assembled    = PETSC_FALSE;
1745   B->mapping      = 0;
1746 
1747   B->insertmode      = NOT_SET_VALUES;
1748   b->size            = size;
1749   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1750 
1751   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1752   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1753 
1754   /* the information in the maps duplicates the information computed below, eventually
1755      we should remove the duplicate information that is not contained in the maps */
1756   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1757   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1758 
1759   /* build local table of row and column ownerships */
1760   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1761   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1762   b->cowners = b->rowners + b->size + 2;
1763   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1764   b->rowners[0] = 0;
1765   for (i=2; i<=b->size; i++) {
1766     b->rowners[i] += b->rowners[i-1];
1767   }
1768   b->rstart = b->rowners[b->rank];
1769   b->rend   = b->rowners[b->rank+1];
1770   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1771   b->cowners[0] = 0;
1772   for (i=2; i<=b->size; i++) {
1773     b->cowners[i] += b->cowners[i-1];
1774   }
1775   b->cstart = b->cowners[b->rank];
1776   b->cend   = b->cowners[b->rank+1];
1777 
1778   /* build cache for off array entries formed */
1779   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1780   b->donotstash  = PETSC_FALSE;
1781   b->colmap      = 0;
1782   b->garray      = 0;
1783   b->roworiented = PETSC_TRUE;
1784 
1785   /* stuff used for matrix vector multiply */
1786   b->lvec      = PETSC_NULL;
1787   b->Mvctx     = PETSC_NULL;
1788 
1789   /* stuff for MatGetRow() */
1790   b->rowindices   = 0;
1791   b->rowvalues    = 0;
1792   b->getrowactive = PETSC_FALSE;
1793 
1794   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1795                                      "MatStoreValues_MPIAIJ",
1796                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1797   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1798                                      "MatRetrieveValues_MPIAIJ",
1799                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1800   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1801 				     "MatGetDiagonalBlock_MPIAIJ",
1802                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1803   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1804 				     "MatIsSymmetric_MPIAIJ",
1805 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1806   PetscFunctionReturn(0);
1807 }
1808 EXTERN_C_END
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1812 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1813 {
1814   Mat        mat;
1815   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1816   int        ierr;
1817 
1818   PetscFunctionBegin;
1819   *newmat       = 0;
1820   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1821   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1822   a    = (Mat_MPIAIJ*)mat->data;
1823   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1824   mat->factor       = matin->factor;
1825   mat->assembled    = PETSC_TRUE;
1826   mat->insertmode   = NOT_SET_VALUES;
1827   mat->preallocated = PETSC_TRUE;
1828 
1829   a->rstart       = oldmat->rstart;
1830   a->rend         = oldmat->rend;
1831   a->cstart       = oldmat->cstart;
1832   a->cend         = oldmat->cend;
1833   a->size         = oldmat->size;
1834   a->rank         = oldmat->rank;
1835   a->donotstash   = oldmat->donotstash;
1836   a->roworiented  = oldmat->roworiented;
1837   a->rowindices   = 0;
1838   a->rowvalues    = 0;
1839   a->getrowactive = PETSC_FALSE;
1840 
1841   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1842   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1843   if (oldmat->colmap) {
1844 #if defined (PETSC_USE_CTABLE)
1845     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1846 #else
1847     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1848     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1849     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1850 #endif
1851   } else a->colmap = 0;
1852   if (oldmat->garray) {
1853     int len;
1854     len  = oldmat->B->n;
1855     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1856     PetscLogObjectMemory(mat,len*sizeof(int));
1857     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1858   } else a->garray = 0;
1859 
1860   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1861   PetscLogObjectParent(mat,a->lvec);
1862   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1863   PetscLogObjectParent(mat,a->Mvctx);
1864   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1865   PetscLogObjectParent(mat,a->A);
1866   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1867   PetscLogObjectParent(mat,a->B);
1868   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1869   *newmat = mat;
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #include "petscsys.h"
1874 
1875 EXTERN_C_BEGIN
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatLoad_MPIAIJ"
1878 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1879 {
1880   Mat          A;
1881   PetscScalar  *vals,*svals;
1882   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1883   MPI_Status   status;
1884   int          i,nz,ierr,j,rstart,rend,fd;
1885   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1886   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1887   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1888 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1889   PetscTruth   flag;
1890 #endif
1891 
1892   PetscFunctionBegin;
1893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1894   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1895   if (!rank) {
1896     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1897     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1898     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1899     if (header[3] < 0) {
1900       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1901     }
1902   }
1903 
1904   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1905   M = header[1]; N = header[2];
1906   /* determine ownership of all rows */
1907   m = M/size + ((M % size) > rank);
1908   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1909   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1910   rowners[0] = 0;
1911   for (i=2; i<=size; i++) {
1912     rowners[i] += rowners[i-1];
1913   }
1914   rstart = rowners[rank];
1915   rend   = rowners[rank+1];
1916 
1917   /* distribute row lengths to all processors */
1918   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1919   offlens = ourlens + (rend-rstart);
1920   if (!rank) {
1921     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1922     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1923     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1924     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1925     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1926     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1927   } else {
1928     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1929   }
1930 
1931   if (!rank) {
1932     /* calculate the number of nonzeros on each processor */
1933     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1934     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1935     for (i=0; i<size; i++) {
1936       for (j=rowners[i]; j< rowners[i+1]; j++) {
1937         procsnz[i] += rowlengths[j];
1938       }
1939     }
1940     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1941 
1942     /* determine max buffer needed and allocate it */
1943     maxnz = 0;
1944     for (i=0; i<size; i++) {
1945       maxnz = PetscMax(maxnz,procsnz[i]);
1946     }
1947     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1948 
1949     /* read in my part of the matrix column indices  */
1950     nz   = procsnz[0];
1951     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1952     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1953 
1954     /* read in every one elses and ship off */
1955     for (i=1; i<size; i++) {
1956       nz   = procsnz[i];
1957       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1958       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1959     }
1960     ierr = PetscFree(cols);CHKERRQ(ierr);
1961   } else {
1962     /* determine buffer space needed for message */
1963     nz = 0;
1964     for (i=0; i<m; i++) {
1965       nz += ourlens[i];
1966     }
1967     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1968 
1969     /* receive message of column indices*/
1970     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1971     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1972     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1973   }
1974 
1975   /* determine column ownership if matrix is not square */
1976   if (N != M) {
1977     n      = N/size + ((N % size) > rank);
1978     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1979     cstart = cend - n;
1980   } else {
1981     cstart = rstart;
1982     cend   = rend;
1983     n      = cend - cstart;
1984   }
1985 
1986   /* loop over local rows, determining number of off diagonal entries */
1987   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1988   jj = 0;
1989   for (i=0; i<m; i++) {
1990     for (j=0; j<ourlens[i]; j++) {
1991       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1992       jj++;
1993     }
1994   }
1995 
1996   /* create our matrix */
1997   for (i=0; i<m; i++) {
1998     ourlens[i] -= offlens[i];
1999   }
2000   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
2001   A = *newmat;
2002   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2003   for (i=0; i<m; i++) {
2004     ourlens[i] += offlens[i];
2005   }
2006 
2007   if (!rank) {
2008     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2009 
2010     /* read in my part of the matrix numerical values  */
2011     nz   = procsnz[0];
2012     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2013 
2014     /* insert into matrix */
2015     jj      = rstart;
2016     smycols = mycols;
2017     svals   = vals;
2018     for (i=0; i<m; i++) {
2019       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2020       smycols += ourlens[i];
2021       svals   += ourlens[i];
2022       jj++;
2023     }
2024 
2025     /* read in other processors and ship out */
2026     for (i=1; i<size; i++) {
2027       nz   = procsnz[i];
2028       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2029       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2030     }
2031     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2032   } else {
2033     /* receive numeric values */
2034     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2035 
2036     /* receive message of values*/
2037     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2038     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2039     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2040 
2041     /* insert into matrix */
2042     jj      = rstart;
2043     smycols = mycols;
2044     svals   = vals;
2045     for (i=0; i<m; i++) {
2046       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2047       smycols += ourlens[i];
2048       svals   += ourlens[i];
2049       jj++;
2050     }
2051   }
2052   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2053   ierr = PetscFree(vals);CHKERRQ(ierr);
2054   ierr = PetscFree(mycols);CHKERRQ(ierr);
2055   ierr = PetscFree(rowners);CHKERRQ(ierr);
2056 
2057   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2058   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059 #if defined(PETSC_HAVE_SPOOLES)
2060   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2061   if (flag) {
2062     if (size == 1) {
2063       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2064     } else {
2065       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2066     }
2067   }
2068 #endif
2069 #if defined(PETSC_HAVE_SUPERLUDIST)
2070   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2071   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2072 #endif
2073 #if defined(PETSC_HAVE_MUMPS)
2074   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2075   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2076 #endif
2077   PetscFunctionReturn(0);
2078 }
2079 EXTERN_C_END
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2083 /*
2084     Not great since it makes two copies of the submatrix, first an SeqAIJ
2085   in local and then by concatenating the local matrices the end result.
2086   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2087 */
2088 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2089 {
2090   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2091   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2092   Mat          *local,M,Mreuse;
2093   PetscScalar  *vwork,*aa;
2094   MPI_Comm     comm = mat->comm;
2095   Mat_SeqAIJ   *aij;
2096 
2097 
2098   PetscFunctionBegin;
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2101 
2102   if (call ==  MAT_REUSE_MATRIX) {
2103     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2104     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2105     local = &Mreuse;
2106     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2107   } else {
2108     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2109     Mreuse = *local;
2110     ierr   = PetscFree(local);CHKERRQ(ierr);
2111   }
2112 
2113   /*
2114       m - number of local rows
2115       n - number of columns (same on all processors)
2116       rstart - first row in new global matrix generated
2117   */
2118   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2119   if (call == MAT_INITIAL_MATRIX) {
2120     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2121     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2122     ii  = aij->i;
2123     jj  = aij->j;
2124 
2125     /*
2126         Determine the number of non-zeros in the diagonal and off-diagonal
2127         portions of the matrix in order to do correct preallocation
2128     */
2129 
2130     /* first get start and end of "diagonal" columns */
2131     if (csize == PETSC_DECIDE) {
2132       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2133       if (mglobal == n) { /* square matrix */
2134 	nlocal = m;
2135       } else {
2136         nlocal = n/size + ((n % size) > rank);
2137       }
2138     } else {
2139       nlocal = csize;
2140     }
2141     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2142     rstart = rend - nlocal;
2143     if (rank == size - 1 && rend != n) {
2144       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2145     }
2146 
2147     /* next, compute all the lengths */
2148     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2149     olens = dlens + m;
2150     for (i=0; i<m; i++) {
2151       jend = ii[i+1] - ii[i];
2152       olen = 0;
2153       dlen = 0;
2154       for (j=0; j<jend; j++) {
2155         if (*jj < rstart || *jj >= rend) olen++;
2156         else dlen++;
2157         jj++;
2158       }
2159       olens[i] = olen;
2160       dlens[i] = dlen;
2161     }
2162     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2163     ierr = PetscFree(dlens);CHKERRQ(ierr);
2164   } else {
2165     int ml,nl;
2166 
2167     M = *newmat;
2168     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2169     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2170     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2171     /*
2172          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2173        rather than the slower MatSetValues().
2174     */
2175     M->was_assembled = PETSC_TRUE;
2176     M->assembled     = PETSC_FALSE;
2177   }
2178   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2179   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2180   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2181   ii  = aij->i;
2182   jj  = aij->j;
2183   aa  = aij->a;
2184   for (i=0; i<m; i++) {
2185     row   = rstart + i;
2186     nz    = ii[i+1] - ii[i];
2187     cwork = jj;     jj += nz;
2188     vwork = aa;     aa += nz;
2189     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2190   }
2191 
2192   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   *newmat = M;
2195 
2196   /* save submatrix used in processor for next request */
2197   if (call ==  MAT_INITIAL_MATRIX) {
2198     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2199     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2200   }
2201 
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2207 /*@C
2208    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2209    (the default parallel PETSc format).  For good matrix assembly performance
2210    the user should preallocate the matrix storage by setting the parameters
2211    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2212    performance can be increased by more than a factor of 50.
2213 
2214    Collective on MPI_Comm
2215 
2216    Input Parameters:
2217 +  A - the matrix
2218 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2219            (same value is used for all local rows)
2220 .  d_nnz - array containing the number of nonzeros in the various rows of the
2221            DIAGONAL portion of the local submatrix (possibly different for each row)
2222            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2223            The size of this array is equal to the number of local rows, i.e 'm'.
2224            You must leave room for the diagonal entry even if it is zero.
2225 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2226            submatrix (same value is used for all local rows).
2227 -  o_nnz - array containing the number of nonzeros in the various rows of the
2228            OFF-DIAGONAL portion of the local submatrix (possibly different for
2229            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2230            structure. The size of this array is equal to the number
2231            of local rows, i.e 'm'.
2232 
2233    The AIJ format (also called the Yale sparse matrix format or
2234    compressed row storage), is fully compatible with standard Fortran 77
2235    storage.  That is, the stored row and column indices can begin at
2236    either one (as in Fortran) or zero.  See the users manual for details.
2237 
2238    The user MUST specify either the local or global matrix dimensions
2239    (possibly both).
2240 
2241    The parallel matrix is partitioned such that the first m0 rows belong to
2242    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2243    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2244 
2245    The DIAGONAL portion of the local submatrix of a processor can be defined
2246    as the submatrix which is obtained by extraction the part corresponding
2247    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2248    first row that belongs to the processor, and r2 is the last row belonging
2249    to the this processor. This is a square mxm matrix. The remaining portion
2250    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2251 
2252    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2253 
2254    By default, this format uses inodes (identical nodes) when possible.
2255    We search for consecutive rows with the same nonzero structure, thereby
2256    reusing matrix information to achieve increased efficiency.
2257 
2258    Options Database Keys:
2259 +  -mat_aij_no_inode  - Do not use inodes
2260 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2261 -  -mat_aij_oneindex - Internally use indexing starting at 1
2262         rather than 0.  Note that when calling MatSetValues(),
2263         the user still MUST index entries starting at 0!
2264 
2265    Example usage:
2266 
2267    Consider the following 8x8 matrix with 34 non-zero values, that is
2268    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2269    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2270    as follows:
2271 
2272 .vb
2273             1  2  0  |  0  3  0  |  0  4
2274     Proc0   0  5  6  |  7  0  0  |  8  0
2275             9  0 10  | 11  0  0  | 12  0
2276     -------------------------------------
2277            13  0 14  | 15 16 17  |  0  0
2278     Proc1   0 18  0  | 19 20 21  |  0  0
2279             0  0  0  | 22 23  0  | 24  0
2280     -------------------------------------
2281     Proc2  25 26 27  |  0  0 28  | 29  0
2282            30  0  0  | 31 32 33  |  0 34
2283 .ve
2284 
2285    This can be represented as a collection of submatrices as:
2286 
2287 .vb
2288       A B C
2289       D E F
2290       G H I
2291 .ve
2292 
2293    Where the submatrices A,B,C are owned by proc0, D,E,F are
2294    owned by proc1, G,H,I are owned by proc2.
2295 
2296    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2297    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2298    The 'M','N' parameters are 8,8, and have the same values on all procs.
2299 
2300    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2301    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2302    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2303    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2304    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2305    matrix, ans [DF] as another SeqAIJ matrix.
2306 
2307    When d_nz, o_nz parameters are specified, d_nz storage elements are
2308    allocated for every row of the local diagonal submatrix, and o_nz
2309    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2310    One way to choose d_nz and o_nz is to use the max nonzerors per local
2311    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2312    In this case, the values of d_nz,o_nz are:
2313 .vb
2314      proc0 : dnz = 2, o_nz = 2
2315      proc1 : dnz = 3, o_nz = 2
2316      proc2 : dnz = 1, o_nz = 4
2317 .ve
2318    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2319    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2320    for proc3. i.e we are using 12+15+10=37 storage locations to store
2321    34 values.
2322 
2323    When d_nnz, o_nnz parameters are specified, the storage is specified
2324    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2325    In the above case the values for d_nnz,o_nnz are:
2326 .vb
2327      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2328      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2329      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2330 .ve
2331    Here the space allocated is sum of all the above values i.e 34, and
2332    hence pre-allocation is perfect.
2333 
2334    Level: intermediate
2335 
2336 .keywords: matrix, aij, compressed row, sparse, parallel
2337 
2338 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2339 @*/
2340 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2341 {
2342   Mat_MPIAIJ   *b;
2343   int          ierr,i;
2344   PetscTruth   flg2;
2345 
2346   PetscFunctionBegin;
2347   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2348   if (!flg2) PetscFunctionReturn(0);
2349   B->preallocated = PETSC_TRUE;
2350   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2351   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2352   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2353   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2354   if (d_nnz) {
2355     for (i=0; i<B->m; i++) {
2356       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2357     }
2358   }
2359   if (o_nnz) {
2360     for (i=0; i<B->m; i++) {
2361       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2362     }
2363   }
2364   b = (Mat_MPIAIJ*)B->data;
2365 
2366   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2367   PetscLogObjectParent(B,b->A);
2368   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2369   PetscLogObjectParent(B,b->B);
2370 
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatCreateMPIAIJ"
2376 /*@C
2377    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2378    (the default parallel PETSc format).  For good matrix assembly performance
2379    the user should preallocate the matrix storage by setting the parameters
2380    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2381    performance can be increased by more than a factor of 50.
2382 
2383    Collective on MPI_Comm
2384 
2385    Input Parameters:
2386 +  comm - MPI communicator
2387 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2388            This value should be the same as the local size used in creating the
2389            y vector for the matrix-vector product y = Ax.
2390 .  n - This value should be the same as the local size used in creating the
2391        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2392        calculated if N is given) For square matrices n is almost always m.
2393 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2394 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2395 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2396            (same value is used for all local rows)
2397 .  d_nnz - array containing the number of nonzeros in the various rows of the
2398            DIAGONAL portion of the local submatrix (possibly different for each row)
2399            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2400            The size of this array is equal to the number of local rows, i.e 'm'.
2401            You must leave room for the diagonal entry even if it is zero.
2402 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2403            submatrix (same value is used for all local rows).
2404 -  o_nnz - array containing the number of nonzeros in the various rows of the
2405            OFF-DIAGONAL portion of the local submatrix (possibly different for
2406            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2407            structure. The size of this array is equal to the number
2408            of local rows, i.e 'm'.
2409 
2410    Output Parameter:
2411 .  A - the matrix
2412 
2413    Notes:
2414    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2415    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2416    storage requirements for this matrix.
2417 
2418    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2419    processor than it must be used on all processors that share the object for
2420    that argument.
2421 
2422    The AIJ format (also called the Yale sparse matrix format or
2423    compressed row storage), is fully compatible with standard Fortran 77
2424    storage.  That is, the stored row and column indices can begin at
2425    either one (as in Fortran) or zero.  See the users manual for details.
2426 
2427    The user MUST specify either the local or global matrix dimensions
2428    (possibly both).
2429 
2430    The parallel matrix is partitioned such that the first m0 rows belong to
2431    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2432    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2433 
2434    The DIAGONAL portion of the local submatrix of a processor can be defined
2435    as the submatrix which is obtained by extraction the part corresponding
2436    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2437    first row that belongs to the processor, and r2 is the last row belonging
2438    to the this processor. This is a square mxm matrix. The remaining portion
2439    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2440 
2441    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2442 
2443    By default, this format uses inodes (identical nodes) when possible.
2444    We search for consecutive rows with the same nonzero structure, thereby
2445    reusing matrix information to achieve increased efficiency.
2446 
2447    Options Database Keys:
2448 +  -mat_aij_no_inode  - Do not use inodes
2449 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2450 -  -mat_aij_oneindex - Internally use indexing starting at 1
2451         rather than 0.  Note that when calling MatSetValues(),
2452         the user still MUST index entries starting at 0!
2453 
2454 
2455    Example usage:
2456 
2457    Consider the following 8x8 matrix with 34 non-zero values, that is
2458    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2459    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2460    as follows:
2461 
2462 .vb
2463             1  2  0  |  0  3  0  |  0  4
2464     Proc0   0  5  6  |  7  0  0  |  8  0
2465             9  0 10  | 11  0  0  | 12  0
2466     -------------------------------------
2467            13  0 14  | 15 16 17  |  0  0
2468     Proc1   0 18  0  | 19 20 21  |  0  0
2469             0  0  0  | 22 23  0  | 24  0
2470     -------------------------------------
2471     Proc2  25 26 27  |  0  0 28  | 29  0
2472            30  0  0  | 31 32 33  |  0 34
2473 .ve
2474 
2475    This can be represented as a collection of submatrices as:
2476 
2477 .vb
2478       A B C
2479       D E F
2480       G H I
2481 .ve
2482 
2483    Where the submatrices A,B,C are owned by proc0, D,E,F are
2484    owned by proc1, G,H,I are owned by proc2.
2485 
2486    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2487    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2488    The 'M','N' parameters are 8,8, and have the same values on all procs.
2489 
2490    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2491    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2492    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2493    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2494    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2495    matrix, ans [DF] as another SeqAIJ matrix.
2496 
2497    When d_nz, o_nz parameters are specified, d_nz storage elements are
2498    allocated for every row of the local diagonal submatrix, and o_nz
2499    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2500    One way to choose d_nz and o_nz is to use the max nonzerors per local
2501    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2502    In this case, the values of d_nz,o_nz are:
2503 .vb
2504      proc0 : dnz = 2, o_nz = 2
2505      proc1 : dnz = 3, o_nz = 2
2506      proc2 : dnz = 1, o_nz = 4
2507 .ve
2508    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2509    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2510    for proc3. i.e we are using 12+15+10=37 storage locations to store
2511    34 values.
2512 
2513    When d_nnz, o_nnz parameters are specified, the storage is specified
2514    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2515    In the above case the values for d_nnz,o_nnz are:
2516 .vb
2517      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2518      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2519      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2520 .ve
2521    Here the space allocated is sum of all the above values i.e 34, and
2522    hence pre-allocation is perfect.
2523 
2524    Level: intermediate
2525 
2526 .keywords: matrix, aij, compressed row, sparse, parallel
2527 
2528 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2529 @*/
2530 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2531 {
2532   int ierr,size;
2533 
2534   PetscFunctionBegin;
2535   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2536   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2537   if (size > 1) {
2538     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2539     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2540   } else {
2541     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2542     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2549 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2550 {
2551   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2552   PetscFunctionBegin;
2553   *Ad     = a->A;
2554   *Ao     = a->B;
2555   *colmap = a->garray;
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2561 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2562 {
2563   int        ierr,i;
2564   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2565 
2566   PetscFunctionBegin;
2567   if (coloring->ctype == IS_COLORING_LOCAL) {
2568     ISColoringValue *allcolors,*colors;
2569     ISColoring      ocoloring;
2570 
2571     /* set coloring for diagonal portion */
2572     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2573 
2574     /* set coloring for off-diagonal portion */
2575     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2576     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2577     for (i=0; i<a->B->n; i++) {
2578       colors[i] = allcolors[a->garray[i]];
2579     }
2580     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2581     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2582     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2583     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2584   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2585     ISColoringValue *colors;
2586     int             *larray;
2587     ISColoring      ocoloring;
2588 
2589     /* set coloring for diagonal portion */
2590     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2591     for (i=0; i<a->A->n; i++) {
2592       larray[i] = i + a->cstart;
2593     }
2594     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2595     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2596     for (i=0; i<a->A->n; i++) {
2597       colors[i] = coloring->colors[larray[i]];
2598     }
2599     ierr = PetscFree(larray);CHKERRQ(ierr);
2600     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2601     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2602     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2603 
2604     /* set coloring for off-diagonal portion */
2605     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2606     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2607     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2608     for (i=0; i<a->B->n; i++) {
2609       colors[i] = coloring->colors[larray[i]];
2610     }
2611     ierr = PetscFree(larray);CHKERRQ(ierr);
2612     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2613     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2614     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2615   } else {
2616     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2617   }
2618 
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 #undef __FUNCT__
2623 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2624 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2625 {
2626   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2627   int        ierr;
2628 
2629   PetscFunctionBegin;
2630   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2631   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2637 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2638 {
2639   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2640   int        ierr;
2641 
2642   PetscFunctionBegin;
2643   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2644   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #undef __FUNCT__
2649 #define __FUNCT__ "MatMerge"
2650 /*@C
2651       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2652                  matrices from each processor
2653 
2654     Collective on MPI_Comm
2655 
2656    Input Parameters:
2657 +    comm - the communicators the parallel matrix will live on
2658 -    inmat - the input sequential matrices
2659 
2660    Output Parameter:
2661 .    outmat - the parallel matrix generated
2662 
2663     Level: advanced
2664 
2665    Notes: The number of columns of the matrix in EACH of the seperate files
2666       MUST be the same.
2667 
2668 @*/
2669 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2670 {
2671   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2672   PetscScalar *values;
2673   PetscMap    columnmap,rowmap;
2674 
2675   PetscFunctionBegin;
2676 
2677   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2678 
2679   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2680   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2681   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2682   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2683   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2684   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2685 
2686   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2687   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2688   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2689   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2690   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2691 
2692   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2693   for (i=0;i<m;i++) {
2694     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2695     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2696     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2697   }
2698   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2699   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2700 
2701   for (i=0;i<m;i++) {
2702     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2703     I    = i + rstart;
2704     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2705     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2706   }
2707   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2708   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2709   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710 
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatFileSplit"
2716 int MatFileSplit(Mat A,char *outfile)
2717 {
2718   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2719   PetscViewer out;
2720   char        *name;
2721   Mat         B;
2722   PetscScalar *values;
2723 
2724   PetscFunctionBegin;
2725 
2726   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2727   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2728   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2729   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2730   for (i=0;i<m;i++) {
2731     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2732     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2733     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2734   }
2735   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2736   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2737 
2738   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2739   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2740   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2741   sprintf(name,"%s.%d",outfile,rank);
2742   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2743   ierr = PetscFree(name);
2744   ierr = MatView(B,out);CHKERRQ(ierr);
2745   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2746   ierr = MatDestroy(B);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749