xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 9abb65ff7b5c67f68494a00e2ecd82b85b89430f)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_MUMPS)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
426 int MatZeroEntries_MPIAIJ(Mat A)
427 {
428   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
429   int        ierr;
430 
431   PetscFunctionBegin;
432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatZeroRows_MPIAIJ"
439 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
440 {
441   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
442   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
443   int            *nprocs,j,idx,nsends,row;
444   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
445   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
446   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
447   MPI_Comm       comm = A->comm;
448   MPI_Request    *send_waits,*recv_waits;
449   MPI_Status     recv_status,*send_status;
450   IS             istmp;
451   PetscTruth     found;
452 
453   PetscFunctionBegin;
454   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
455   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
456 
457   /*  first count number of contributors to each processor */
458   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
459   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
460   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
461   for (i=0; i<N; i++) {
462     idx = rows[i];
463     found = PETSC_FALSE;
464     for (j=0; j<size; j++) {
465       if (idx >= owners[j] && idx < owners[j+1]) {
466         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
467       }
468     }
469     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
470   }
471   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
472 
473   /* inform other processors of number of messages and max length*/
474   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
475 
476   /* post receives:   */
477   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
478   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
479   for (i=0; i<nrecvs; i++) {
480     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
481   }
482 
483   /* do sends:
484       1) starts[i] gives the starting index in svalues for stuff going to
485          the ith processor
486   */
487   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
488   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
490   starts[0] = 0;
491   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   for (i=0; i<N; i++) {
493     svalues[starts[owner[i]]++] = rows[i];
494   }
495   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
496 
497   starts[0] = 0;
498   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
499   count = 0;
500   for (i=0; i<size; i++) {
501     if (nprocs[2*i+1]) {
502       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
503     }
504   }
505   ierr = PetscFree(starts);CHKERRQ(ierr);
506 
507   base = owners[rank];
508 
509   /*  wait on receives */
510   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
511   source = lens + nrecvs;
512   count  = nrecvs; slen = 0;
513   while (count) {
514     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
515     /* unpack receives into our local space */
516     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
517     source[imdex]  = recv_status.MPI_SOURCE;
518     lens[imdex]    = n;
519     slen          += n;
520     count--;
521   }
522   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
523 
524   /* move the data into the send scatter */
525   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
526   count = 0;
527   for (i=0; i<nrecvs; i++) {
528     values = rvalues + i*nmax;
529     for (j=0; j<lens[i]; j++) {
530       lrows[count++] = values[j] - base;
531     }
532   }
533   ierr = PetscFree(rvalues);CHKERRQ(ierr);
534   ierr = PetscFree(lens);CHKERRQ(ierr);
535   ierr = PetscFree(owner);CHKERRQ(ierr);
536   ierr = PetscFree(nprocs);CHKERRQ(ierr);
537 
538   /* actually zap the local rows */
539   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540   PetscLogObjectParent(A,istmp);
541 
542   /*
543         Zero the required rows. If the "diagonal block" of the matrix
544      is square and the user wishes to set the diagonal we use seperate
545      code so that MatSetValues() is not called for each diagonal allocating
546      new memory, thus calling lots of mallocs and slowing things down.
547 
548        Contributed by: Mathew Knepley
549   */
550   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
551   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
552   if (diag && (l->A->M == l->A->N)) {
553     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
554   } else if (diag) {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
557       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
559     }
560     for (i = 0; i < slen; i++) {
561       row  = lrows[i] + rstart;
562       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
563     }
564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   } else {
567     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
568   }
569   ierr = ISDestroy(istmp);CHKERRQ(ierr);
570   ierr = PetscFree(lrows);CHKERRQ(ierr);
571 
572   /* wait on sends */
573   if (nsends) {
574     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
575     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
576     ierr = PetscFree(send_status);CHKERRQ(ierr);
577   }
578   ierr = PetscFree(send_waits);CHKERRQ(ierr);
579   ierr = PetscFree(svalues);CHKERRQ(ierr);
580 
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMult_MPIAIJ"
586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr,nt;
590 
591   PetscFunctionBegin;
592   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
593   if (nt != A->n) {
594     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
595   }
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNCT__
604 #define __FUNCT__ "MatMultAdd_MPIAIJ"
605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
613   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
621 {
622   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623   int        ierr;
624 
625   PetscFunctionBegin;
626   /* do nondiagonal part */
627   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
628   /* send it on its way */
629   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   /* do local part */
631   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
632   /* receive remote parts: note this assumes the values are not actually */
633   /* inserted in yy until the next line, which is true for my implementation*/
634   /* but is not perhaps always true. */
635   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 EXTERN_C_BEGIN
640 #undef __FUNCT__
641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
643 {
644   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
645   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
646   IS         Me,Notme;
647   int        M,N,first,last,*notme,i, ierr;
648 
649   PetscFunctionBegin;
650 
651   /* Easy test: symmetric diagonal block */
652   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
653   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
654   if (!*f) PetscFunctionReturn(0);
655 
656   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
657   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
663   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
664   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
665   Aoff = Aoffs[0];
666   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
667   Boff = Boffs[0];
668   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
669   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
670   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
671   ierr = ISDestroy(Me); CHKERRQ(ierr);
672   ierr = ISDestroy(Notme); CHKERRQ(ierr);
673 
674   PetscFunctionReturn(0);
675 }
676 EXTERN_C_END
677 
678 #undef __FUNCT__
679 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
680 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
681 {
682   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
683   int        ierr;
684 
685   PetscFunctionBegin;
686   /* do nondiagonal part */
687   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
688   /* send it on its way */
689   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
690   /* do local part */
691   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
692   /* receive remote parts: note this assumes the values are not actually */
693   /* inserted in yy until the next line, which is true for my implementation*/
694   /* but is not perhaps always true. */
695   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   This only works correctly for square matrices where the subblock A->A is the
701    diagonal block
702 */
703 #undef __FUNCT__
704 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
705 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
706 {
707   int        ierr;
708   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
709 
710   PetscFunctionBegin;
711   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
712   if (a->rstart != a->cstart || a->rend != a->cend) {
713     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
714   }
715   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatScale_MPIAIJ"
721 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
722 {
723   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
724   int        ierr;
725 
726   PetscFunctionBegin;
727   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
728   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPIAIJ"
734 int MatDestroy_MPIAIJ(Mat mat)
735 {
736   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
737   int        ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
745   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
747 #if defined (PETSC_USE_CTABLE)
748   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
749 #else
750   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
751 #endif
752   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
753   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
754   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
755   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
756   ierr = PetscFree(aij);CHKERRQ(ierr);
757   PetscFunctionReturn(0);
758 }
759 
760 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
761 extern int MatFactorInfo_MUMPS(Mat,PetscViewer);
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatView_MPIAIJ_Binary"
765 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
766 {
767   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
768   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
769   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
770   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
771   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
772   PetscScalar       *column_values;
773 
774   PetscFunctionBegin;
775   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
776   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
777   nz   = A->nz + B->nz;
778   if (rank == 0) {
779     header[0] = MAT_FILE_COOKIE;
780     header[1] = mat->M;
781     header[2] = mat->N;
782     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
783     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
784     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
785     /* get largest number of rows any processor has */
786     rlen = mat->m;
787     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
788     for (i=1; i<size; i++) {
789       rlen = PetscMax(rlen,range[i+1] - range[i]);
790     }
791   } else {
792     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
793     rlen = mat->m;
794   }
795 
796   /* load up the local row counts */
797   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
798   for (i=0; i<mat->m; i++) {
799     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
800   }
801 
802   /* store the row lengths to the file */
803   if (rank == 0) {
804     MPI_Status status;
805     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
806     for (i=1; i<size; i++) {
807       rlen = range[i+1] - range[i];
808       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
809       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
810     }
811   } else {
812     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
815 
816   /* load up the local column indices */
817   nzmax = nz; /* )th processor needs space a largest processor needs */
818   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
819   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
820   cnt  = 0;
821   for (i=0; i<mat->m; i++) {
822     for (j=B->i[i]; j<B->i[i+1]; j++) {
823       if ( (col = garray[B->j[j]]) > cstart) break;
824       column_indices[cnt++] = col;
825     }
826     for (k=A->i[i]; k<A->i[i+1]; k++) {
827       column_indices[cnt++] = A->j[k] + cstart;
828     }
829     for (; j<B->i[i+1]; j++) {
830       column_indices[cnt++] = garray[B->j[j]];
831     }
832   }
833   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
834 
835   /* store the column indices to the file */
836   if (rank == 0) {
837     MPI_Status status;
838     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
839     for (i=1; i<size; i++) {
840       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
842       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
843       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
844     }
845   } else {
846     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
847     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
848   }
849   ierr = PetscFree(column_indices);CHKERRQ(ierr);
850 
851   /* load up the local column values */
852   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
853   cnt  = 0;
854   for (i=0; i<mat->m; i++) {
855     for (j=B->i[i]; j<B->i[i+1]; j++) {
856       if ( garray[B->j[j]] > cstart) break;
857       column_values[cnt++] = B->a[j];
858     }
859     for (k=A->i[i]; k<A->i[i+1]; k++) {
860       column_values[cnt++] = A->a[k];
861     }
862     for (; j<B->i[i+1]; j++) {
863       column_values[cnt++] = B->a[j];
864     }
865   }
866   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
867 
868   /* store the column values to the file */
869   if (rank == 0) {
870     MPI_Status status;
871     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
872     for (i=1; i<size; i++) {
873       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
874       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
875       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
876       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
877     }
878   } else {
879     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
880     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
881   }
882   ierr = PetscFree(column_values);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
888 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
889 {
890   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
891   int               ierr,rank = aij->rank,size = aij->size;
892   PetscTruth        isdraw,isascii,flg,isbinary;
893   PetscViewer       sviewer;
894   PetscViewerFormat format;
895 
896   PetscFunctionBegin;
897   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
898   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
899   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
900   if (isascii) {
901     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
902     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
903       MatInfo info;
904       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
905       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
906       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
907       if (flg) {
908         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
909 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
910       } else {
911         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
912 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
913       }
914       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
915       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
916       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
917       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
918       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
919       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
920       PetscFunctionReturn(0);
921     } else if (format == PETSC_VIEWER_ASCII_INFO) {
922       PetscFunctionReturn(0);
923     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
924 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
925       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
926 #endif
927 #if defined(PETSC_HAVE_MUMPS) && !defined(PETSC_USE_SINGLE)
928       ierr = MatFactorInfo_MUMPS(mat,viewer);CHKERRQ(ierr);
929 #endif
930       PetscFunctionReturn(0);
931     }
932   } else if (isbinary) {
933     if (size == 1) {
934       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
935       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
936     } else {
937       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
938     }
939     PetscFunctionReturn(0);
940   } else if (isdraw) {
941     PetscDraw  draw;
942     PetscTruth isnull;
943     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
944     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
945   }
946 
947   if (size == 1) {
948     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
949     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
950   } else {
951     /* assemble the entire matrix onto first processor. */
952     Mat         A;
953     Mat_SeqAIJ *Aloc;
954     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
955     PetscScalar *a;
956 
957     if (!rank) {
958       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
959     } else {
960       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
961     }
962     PetscLogObjectParent(mat,A);
963 
964     /* copy over the A part */
965     Aloc = (Mat_SeqAIJ*)aij->A->data;
966     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
967     row = aij->rstart;
968     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
969     for (i=0; i<m; i++) {
970       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
971       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
972     }
973     aj = Aloc->j;
974     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
975 
976     /* copy over the B part */
977     Aloc = (Mat_SeqAIJ*)aij->B->data;
978     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
979     row  = aij->rstart;
980     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
981     ct   = cols;
982     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
983     for (i=0; i<m; i++) {
984       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
985       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
986     }
987     ierr = PetscFree(ct);CHKERRQ(ierr);
988     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
989     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
990     /*
991        Everyone has to call to draw the matrix since the graphics waits are
992        synchronized across all processors that share the PetscDraw object
993     */
994     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
995     if (!rank) {
996       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
997       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
998     }
999     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1000     ierr = MatDestroy(A);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatView_MPIAIJ"
1007 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1008 {
1009   int        ierr;
1010   PetscTruth isascii,isdraw,issocket,isbinary;
1011 
1012   PetscFunctionBegin;
1013   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1014   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1015   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1016   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1017   if (isascii || isdraw || isbinary || issocket) {
1018     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1019   } else {
1020     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1021   }
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatRelax_MPIAIJ"
1029 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1030 {
1031   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1032   int          ierr;
1033   Vec          bb1;
1034   PetscScalar  mone=-1.0;
1035 
1036   PetscFunctionBegin;
1037   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1038 
1039   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1040 
1041   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1042     if (flag & SOR_ZERO_INITIAL_GUESS) {
1043       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1044       its--;
1045     }
1046 
1047     while (its--) {
1048       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050 
1051       /* update rhs: bb1 = bb - B*x */
1052       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1053       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1054 
1055       /* local sweep */
1056       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1057       CHKERRQ(ierr);
1058     }
1059   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1060     if (flag & SOR_ZERO_INITIAL_GUESS) {
1061       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1062       its--;
1063     }
1064     while (its--) {
1065       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1066       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067 
1068       /* update rhs: bb1 = bb - B*x */
1069       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1070       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1071 
1072       /* local sweep */
1073       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1074       CHKERRQ(ierr);
1075     }
1076   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1077     if (flag & SOR_ZERO_INITIAL_GUESS) {
1078       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1079       its--;
1080     }
1081     while (its--) {
1082       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1083       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1084 
1085       /* update rhs: bb1 = bb - B*x */
1086       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1087       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1088 
1089       /* local sweep */
1090       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1091       CHKERRQ(ierr);
1092     }
1093   } else {
1094     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1095   }
1096 
1097   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1103 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1104 {
1105   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1106   Mat        A = mat->A,B = mat->B;
1107   int        ierr;
1108   PetscReal  isend[5],irecv[5];
1109 
1110   PetscFunctionBegin;
1111   info->block_size     = 1.0;
1112   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1113   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1114   isend[3] = info->memory;  isend[4] = info->mallocs;
1115   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1116   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1117   isend[3] += info->memory;  isend[4] += info->mallocs;
1118   if (flag == MAT_LOCAL) {
1119     info->nz_used      = isend[0];
1120     info->nz_allocated = isend[1];
1121     info->nz_unneeded  = isend[2];
1122     info->memory       = isend[3];
1123     info->mallocs      = isend[4];
1124   } else if (flag == MAT_GLOBAL_MAX) {
1125     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1126     info->nz_used      = irecv[0];
1127     info->nz_allocated = irecv[1];
1128     info->nz_unneeded  = irecv[2];
1129     info->memory       = irecv[3];
1130     info->mallocs      = irecv[4];
1131   } else if (flag == MAT_GLOBAL_SUM) {
1132     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1133     info->nz_used      = irecv[0];
1134     info->nz_allocated = irecv[1];
1135     info->nz_unneeded  = irecv[2];
1136     info->memory       = irecv[3];
1137     info->mallocs      = irecv[4];
1138   }
1139   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1140   info->fill_ratio_needed = 0;
1141   info->factor_mallocs    = 0;
1142   info->rows_global       = (double)matin->M;
1143   info->columns_global    = (double)matin->N;
1144   info->rows_local        = (double)matin->m;
1145   info->columns_local     = (double)matin->N;
1146 
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatSetOption_MPIAIJ"
1152 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1153 {
1154   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1155   int        ierr;
1156 
1157   PetscFunctionBegin;
1158   switch (op) {
1159   case MAT_NO_NEW_NONZERO_LOCATIONS:
1160   case MAT_YES_NEW_NONZERO_LOCATIONS:
1161   case MAT_COLUMNS_UNSORTED:
1162   case MAT_COLUMNS_SORTED:
1163   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1164   case MAT_KEEP_ZEROED_ROWS:
1165   case MAT_NEW_NONZERO_LOCATION_ERR:
1166   case MAT_USE_INODES:
1167   case MAT_DO_NOT_USE_INODES:
1168   case MAT_IGNORE_ZERO_ENTRIES:
1169     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1170     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1171     break;
1172   case MAT_ROW_ORIENTED:
1173     a->roworiented = PETSC_TRUE;
1174     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1175     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1176     break;
1177   case MAT_ROWS_SORTED:
1178   case MAT_ROWS_UNSORTED:
1179   case MAT_YES_NEW_DIAGONALS:
1180     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1181     break;
1182   case MAT_COLUMN_ORIENTED:
1183     a->roworiented = PETSC_FALSE;
1184     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1185     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1186     break;
1187   case MAT_IGNORE_OFF_PROC_ENTRIES:
1188     a->donotstash = PETSC_TRUE;
1189     break;
1190   case MAT_NO_NEW_DIAGONALS:
1191     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1192   default:
1193     SETERRQ(PETSC_ERR_SUP,"unknown option");
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatGetRow_MPIAIJ"
1200 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1201 {
1202   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1203   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1204   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1205   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1206   int          *cmap,*idx_p;
1207 
1208   PetscFunctionBegin;
1209   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1210   mat->getrowactive = PETSC_TRUE;
1211 
1212   if (!mat->rowvalues && (idx || v)) {
1213     /*
1214         allocate enough space to hold information from the longest row.
1215     */
1216     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1217     int     max = 1,tmp;
1218     for (i=0; i<matin->m; i++) {
1219       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1220       if (max < tmp) { max = tmp; }
1221     }
1222     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1223     mat->rowindices = (int*)(mat->rowvalues + max);
1224   }
1225 
1226   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1227   lrow = row - rstart;
1228 
1229   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1230   if (!v)   {pvA = 0; pvB = 0;}
1231   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1232   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1233   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1234   nztot = nzA + nzB;
1235 
1236   cmap  = mat->garray;
1237   if (v  || idx) {
1238     if (nztot) {
1239       /* Sort by increasing column numbers, assuming A and B already sorted */
1240       int imark = -1;
1241       if (v) {
1242         *v = v_p = mat->rowvalues;
1243         for (i=0; i<nzB; i++) {
1244           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1245           else break;
1246         }
1247         imark = i;
1248         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1249         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1250       }
1251       if (idx) {
1252         *idx = idx_p = mat->rowindices;
1253         if (imark > -1) {
1254           for (i=0; i<imark; i++) {
1255             idx_p[i] = cmap[cworkB[i]];
1256           }
1257         } else {
1258           for (i=0; i<nzB; i++) {
1259             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1260             else break;
1261           }
1262           imark = i;
1263         }
1264         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1265         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1266       }
1267     } else {
1268       if (idx) *idx = 0;
1269       if (v)   *v   = 0;
1270     }
1271   }
1272   *nz = nztot;
1273   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1274   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1280 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1281 {
1282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1283 
1284   PetscFunctionBegin;
1285   if (aij->getrowactive == PETSC_FALSE) {
1286     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1287   }
1288   aij->getrowactive = PETSC_FALSE;
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatNorm_MPIAIJ"
1294 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1295 {
1296   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1297   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1298   int          ierr,i,j,cstart = aij->cstart;
1299   PetscReal    sum = 0.0;
1300   PetscScalar  *v;
1301 
1302   PetscFunctionBegin;
1303   if (aij->size == 1) {
1304     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1305   } else {
1306     if (type == NORM_FROBENIUS) {
1307       v = amat->a;
1308       for (i=0; i<amat->nz; i++) {
1309 #if defined(PETSC_USE_COMPLEX)
1310         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1311 #else
1312         sum += (*v)*(*v); v++;
1313 #endif
1314       }
1315       v = bmat->a;
1316       for (i=0; i<bmat->nz; i++) {
1317 #if defined(PETSC_USE_COMPLEX)
1318         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1319 #else
1320         sum += (*v)*(*v); v++;
1321 #endif
1322       }
1323       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1324       *norm = sqrt(*norm);
1325     } else if (type == NORM_1) { /* max column norm */
1326       PetscReal *tmp,*tmp2;
1327       int    *jj,*garray = aij->garray;
1328       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1329       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1330       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1331       *norm = 0.0;
1332       v = amat->a; jj = amat->j;
1333       for (j=0; j<amat->nz; j++) {
1334         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1335       }
1336       v = bmat->a; jj = bmat->j;
1337       for (j=0; j<bmat->nz; j++) {
1338         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1339       }
1340       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1341       for (j=0; j<mat->N; j++) {
1342         if (tmp2[j] > *norm) *norm = tmp2[j];
1343       }
1344       ierr = PetscFree(tmp);CHKERRQ(ierr);
1345       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1346     } else if (type == NORM_INFINITY) { /* max row norm */
1347       PetscReal ntemp = 0.0;
1348       for (j=0; j<aij->A->m; j++) {
1349         v = amat->a + amat->i[j];
1350         sum = 0.0;
1351         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1352           sum += PetscAbsScalar(*v); v++;
1353         }
1354         v = bmat->a + bmat->i[j];
1355         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1356           sum += PetscAbsScalar(*v); v++;
1357         }
1358         if (sum > ntemp) ntemp = sum;
1359       }
1360       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1361     } else {
1362       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1363     }
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatTranspose_MPIAIJ"
1370 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1371 {
1372   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1373   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1374   int          ierr;
1375   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1376   Mat          B;
1377   PetscScalar  *array;
1378 
1379   PetscFunctionBegin;
1380   if (!matout && M != N) {
1381     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1382   }
1383 
1384   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1385 
1386   /* copy over the A part */
1387   Aloc = (Mat_SeqAIJ*)a->A->data;
1388   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1389   row = a->rstart;
1390   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1391   for (i=0; i<m; i++) {
1392     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1393     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1394   }
1395   aj = Aloc->j;
1396   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1397 
1398   /* copy over the B part */
1399   Aloc = (Mat_SeqAIJ*)a->B->data;
1400   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1401   row  = a->rstart;
1402   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1403   ct   = cols;
1404   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1405   for (i=0; i<m; i++) {
1406     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1407     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1408   }
1409   ierr = PetscFree(ct);CHKERRQ(ierr);
1410   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1411   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1412   if (matout) {
1413     *matout = B;
1414   } else {
1415     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1416   }
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1422 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1423 {
1424   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1425   Mat        a = aij->A,b = aij->B;
1426   int        ierr,s1,s2,s3;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1430   if (rr) {
1431     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1432     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1433     /* Overlap communication with computation. */
1434     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1435   }
1436   if (ll) {
1437     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1438     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1439     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1440   }
1441   /* scale  the diagonal block */
1442   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1443 
1444   if (rr) {
1445     /* Do a scatter end and then right scale the off-diagonal block */
1446     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1447     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1448   }
1449 
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1456 int MatPrintHelp_MPIAIJ(Mat A)
1457 {
1458   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1459   int        ierr;
1460 
1461   PetscFunctionBegin;
1462   if (!a->rank) {
1463     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1464   }
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1470 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1471 {
1472   PetscFunctionBegin;
1473   *bs = 1;
1474   PetscFunctionReturn(0);
1475 }
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1478 int MatSetUnfactored_MPIAIJ(Mat A)
1479 {
1480   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1481   int        ierr;
1482 
1483   PetscFunctionBegin;
1484   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatEqual_MPIAIJ"
1490 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1491 {
1492   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1493   Mat        a,b,c,d;
1494   PetscTruth flg;
1495   int        ierr;
1496 
1497   PetscFunctionBegin;
1498   a = matA->A; b = matA->B;
1499   c = matB->A; d = matB->B;
1500 
1501   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1502   if (flg == PETSC_TRUE) {
1503     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1504   }
1505   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatCopy_MPIAIJ"
1511 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1512 {
1513   int        ierr;
1514   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1515   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1516 
1517   PetscFunctionBegin;
1518   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1519   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1520     /* because of the column compression in the off-processor part of the matrix a->B,
1521        the number of columns in a->B and b->B may be different, hence we cannot call
1522        the MatCopy() directly on the two parts. If need be, we can provide a more
1523        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1524        then copying the submatrices */
1525     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1526   } else {
1527     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1528     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1529   }
1530   PetscFunctionReturn(0);
1531 }
1532 
1533 #undef __FUNCT__
1534 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1535 int MatSetUpPreallocation_MPIAIJ(Mat A)
1536 {
1537   int        ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1545 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1546 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1547 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1548 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1549 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1550 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1551 #endif
1552 
1553 #include "petscblaslapack.h"
1554 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatAXPY_MPIAIJ"
1557 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1558 {
1559   int        ierr,one=1,i;
1560   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1561   Mat_SeqAIJ *x,*y;
1562 
1563   PetscFunctionBegin;
1564   if (str == SAME_NONZERO_PATTERN) {
1565     x = (Mat_SeqAIJ *)xx->A->data;
1566     y = (Mat_SeqAIJ *)yy->A->data;
1567     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1568     x = (Mat_SeqAIJ *)xx->B->data;
1569     y = (Mat_SeqAIJ *)yy->B->data;
1570     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1571   } else if (str == SUBSET_NONZERO_PATTERN) {
1572     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1573 
1574     x = (Mat_SeqAIJ *)xx->B->data;
1575     y = (Mat_SeqAIJ *)yy->B->data;
1576     if (y->xtoy && y->XtoY != xx->B) {
1577       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1578       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1579     }
1580     if (!y->xtoy) { /* get xtoy */
1581       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1582       y->XtoY = xx->B;
1583     }
1584     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1585   } else {
1586     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 /* -------------------------------------------------------------------*/
1592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1593        MatGetRow_MPIAIJ,
1594        MatRestoreRow_MPIAIJ,
1595        MatMult_MPIAIJ,
1596        MatMultAdd_MPIAIJ,
1597        MatMultTranspose_MPIAIJ,
1598        MatMultTransposeAdd_MPIAIJ,
1599        0,
1600        0,
1601        0,
1602        0,
1603        0,
1604        0,
1605        MatRelax_MPIAIJ,
1606        MatTranspose_MPIAIJ,
1607        MatGetInfo_MPIAIJ,
1608        MatEqual_MPIAIJ,
1609        MatGetDiagonal_MPIAIJ,
1610        MatDiagonalScale_MPIAIJ,
1611        MatNorm_MPIAIJ,
1612        MatAssemblyBegin_MPIAIJ,
1613        MatAssemblyEnd_MPIAIJ,
1614        0,
1615        MatSetOption_MPIAIJ,
1616        MatZeroEntries_MPIAIJ,
1617        MatZeroRows_MPIAIJ,
1618 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1619        MatLUFactorSymbolic_MPIAIJ_TFS,
1620 #else
1621        0,
1622 #endif
1623        0,
1624        0,
1625        0,
1626        MatSetUpPreallocation_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631        MatDuplicate_MPIAIJ,
1632        0,
1633        0,
1634        0,
1635        0,
1636        MatAXPY_MPIAIJ,
1637        MatGetSubMatrices_MPIAIJ,
1638        MatIncreaseOverlap_MPIAIJ,
1639        MatGetValues_MPIAIJ,
1640        MatCopy_MPIAIJ,
1641        MatPrintHelp_MPIAIJ,
1642        MatScale_MPIAIJ,
1643        0,
1644        0,
1645        0,
1646        MatGetBlockSize_MPIAIJ,
1647        0,
1648        0,
1649        0,
1650        0,
1651        MatFDColoringCreate_MPIAIJ,
1652        0,
1653        MatSetUnfactored_MPIAIJ,
1654        0,
1655        0,
1656        MatGetSubMatrix_MPIAIJ,
1657        MatDestroy_MPIAIJ,
1658        MatView_MPIAIJ,
1659        MatGetPetscMaps_Petsc,
1660        0,
1661        0,
1662        0,
1663        0,
1664        0,
1665        0,
1666        0,
1667        0,
1668        MatSetColoring_MPIAIJ,
1669        MatSetValuesAdic_MPIAIJ,
1670        MatSetValuesAdifor_MPIAIJ
1671 };
1672 
1673 /* ----------------------------------------------------------------------------------------*/
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1678 int MatStoreValues_MPIAIJ(Mat mat)
1679 {
1680   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1681   int        ierr;
1682 
1683   PetscFunctionBegin;
1684   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1685   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 EXTERN_C_END
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1693 int MatRetrieveValues_MPIAIJ(Mat mat)
1694 {
1695   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1696   int        ierr;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1700   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 EXTERN_C_END
1704 
1705 #include "petscpc.h"
1706 EXTERN_C_BEGIN
1707 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1708 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1709 EXTERN_C_END
1710 
1711 EXTERN_C_BEGIN
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1714 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1715 {
1716   Mat_MPIAIJ   *b;
1717   int          ierr,i;
1718 
1719   PetscFunctionBegin;
1720   B->preallocated = PETSC_TRUE;
1721   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1722   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1723   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1724   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1725   if (d_nnz) {
1726     for (i=0; i<B->m; i++) {
1727       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1728     }
1729   }
1730   if (o_nnz) {
1731     for (i=0; i<B->m; i++) {
1732       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1733     }
1734   }
1735   b = (Mat_MPIAIJ*)B->data;
1736 
1737   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1738   PetscLogObjectParent(B,b->A);
1739   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1740   PetscLogObjectParent(B,b->B);
1741 
1742   PetscFunctionReturn(0);
1743 }
1744 EXTERN_C_END
1745 
1746 EXTERN_C_BEGIN
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatCreate_MPIAIJ"
1749 int MatCreate_MPIAIJ(Mat B)
1750 {
1751   Mat_MPIAIJ *b;
1752   int        ierr,i,size;
1753 
1754   PetscFunctionBegin;
1755   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1756 
1757   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1758   B->data         = (void*)b;
1759   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1760   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1761   B->factor       = 0;
1762   B->assembled    = PETSC_FALSE;
1763   B->mapping      = 0;
1764 
1765   B->insertmode      = NOT_SET_VALUES;
1766   b->size            = size;
1767   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1768 
1769   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1770   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1771 
1772   /* the information in the maps duplicates the information computed below, eventually
1773      we should remove the duplicate information that is not contained in the maps */
1774   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1775   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1776 
1777   /* build local table of row and column ownerships */
1778   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1779   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1780   b->cowners = b->rowners + b->size + 2;
1781   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1782   b->rowners[0] = 0;
1783   for (i=2; i<=b->size; i++) {
1784     b->rowners[i] += b->rowners[i-1];
1785   }
1786   b->rstart = b->rowners[b->rank];
1787   b->rend   = b->rowners[b->rank+1];
1788   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1789   b->cowners[0] = 0;
1790   for (i=2; i<=b->size; i++) {
1791     b->cowners[i] += b->cowners[i-1];
1792   }
1793   b->cstart = b->cowners[b->rank];
1794   b->cend   = b->cowners[b->rank+1];
1795 
1796   /* build cache for off array entries formed */
1797   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1798   b->donotstash  = PETSC_FALSE;
1799   b->colmap      = 0;
1800   b->garray      = 0;
1801   b->roworiented = PETSC_TRUE;
1802 
1803   /* stuff used for matrix vector multiply */
1804   b->lvec      = PETSC_NULL;
1805   b->Mvctx     = PETSC_NULL;
1806 
1807   /* stuff for MatGetRow() */
1808   b->rowindices   = 0;
1809   b->rowvalues    = 0;
1810   b->getrowactive = PETSC_FALSE;
1811 
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1813                                      "MatStoreValues_MPIAIJ",
1814                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1816                                      "MatRetrieveValues_MPIAIJ",
1817                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1819 				     "MatGetDiagonalBlock_MPIAIJ",
1820                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1822 				     "MatIsSymmetric_MPIAIJ",
1823 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1825 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1826 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1828 				     "MatDiagonalScaleLocal_MPIAIJ",
1829 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1830   PetscFunctionReturn(0);
1831 }
1832 EXTERN_C_END
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1836 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1837 {
1838   Mat        mat;
1839   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1840   int        ierr;
1841 
1842   PetscFunctionBegin;
1843   *newmat       = 0;
1844   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1845   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1846   a    = (Mat_MPIAIJ*)mat->data;
1847   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1848   mat->factor       = matin->factor;
1849   mat->assembled    = PETSC_TRUE;
1850   mat->insertmode   = NOT_SET_VALUES;
1851   mat->preallocated = PETSC_TRUE;
1852 
1853   a->rstart       = oldmat->rstart;
1854   a->rend         = oldmat->rend;
1855   a->cstart       = oldmat->cstart;
1856   a->cend         = oldmat->cend;
1857   a->size         = oldmat->size;
1858   a->rank         = oldmat->rank;
1859   a->donotstash   = oldmat->donotstash;
1860   a->roworiented  = oldmat->roworiented;
1861   a->rowindices   = 0;
1862   a->rowvalues    = 0;
1863   a->getrowactive = PETSC_FALSE;
1864 
1865   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1866   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1867   if (oldmat->colmap) {
1868 #if defined (PETSC_USE_CTABLE)
1869     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1870 #else
1871     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1872     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1873     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1874 #endif
1875   } else a->colmap = 0;
1876   if (oldmat->garray) {
1877     int len;
1878     len  = oldmat->B->n;
1879     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1880     PetscLogObjectMemory(mat,len*sizeof(int));
1881     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1882   } else a->garray = 0;
1883 
1884   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1885   PetscLogObjectParent(mat,a->lvec);
1886   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1887   PetscLogObjectParent(mat,a->Mvctx);
1888   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1889   PetscLogObjectParent(mat,a->A);
1890   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1891   PetscLogObjectParent(mat,a->B);
1892   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1893   *newmat = mat;
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #include "petscsys.h"
1898 
1899 EXTERN_C_BEGIN
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatLoad_MPIAIJ"
1902 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1903 {
1904   Mat          A;
1905   PetscScalar  *vals,*svals;
1906   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1907   MPI_Status   status;
1908   int          i,nz,ierr,j,rstart,rend,fd;
1909   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1910   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1911   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1912 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_MUMPS)
1913   PetscTruth   flag;
1914 #endif
1915 
1916   PetscFunctionBegin;
1917   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1918   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1919   if (!rank) {
1920     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1921     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1922     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1923     if (header[3] < 0) {
1924       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1925     }
1926   }
1927 
1928   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1929   M = header[1]; N = header[2];
1930   /* determine ownership of all rows */
1931   m = M/size + ((M % size) > rank);
1932   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1933   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1934   rowners[0] = 0;
1935   for (i=2; i<=size; i++) {
1936     rowners[i] += rowners[i-1];
1937   }
1938   rstart = rowners[rank];
1939   rend   = rowners[rank+1];
1940 
1941   /* distribute row lengths to all processors */
1942   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1943   offlens = ourlens + (rend-rstart);
1944   if (!rank) {
1945     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1946     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1947     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1948     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1949     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1950     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1951   } else {
1952     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1953   }
1954 
1955   if (!rank) {
1956     /* calculate the number of nonzeros on each processor */
1957     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1958     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1959     for (i=0; i<size; i++) {
1960       for (j=rowners[i]; j< rowners[i+1]; j++) {
1961         procsnz[i] += rowlengths[j];
1962       }
1963     }
1964     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1965 
1966     /* determine max buffer needed and allocate it */
1967     maxnz = 0;
1968     for (i=0; i<size; i++) {
1969       maxnz = PetscMax(maxnz,procsnz[i]);
1970     }
1971     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1972 
1973     /* read in my part of the matrix column indices  */
1974     nz   = procsnz[0];
1975     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1976     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1977 
1978     /* read in every one elses and ship off */
1979     for (i=1; i<size; i++) {
1980       nz   = procsnz[i];
1981       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1982       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1983     }
1984     ierr = PetscFree(cols);CHKERRQ(ierr);
1985   } else {
1986     /* determine buffer space needed for message */
1987     nz = 0;
1988     for (i=0; i<m; i++) {
1989       nz += ourlens[i];
1990     }
1991     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1992 
1993     /* receive message of column indices*/
1994     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1995     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1996     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1997   }
1998 
1999   /* determine column ownership if matrix is not square */
2000   if (N != M) {
2001     n      = N/size + ((N % size) > rank);
2002     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2003     cstart = cend - n;
2004   } else {
2005     cstart = rstart;
2006     cend   = rend;
2007     n      = cend - cstart;
2008   }
2009 
2010   /* loop over local rows, determining number of off diagonal entries */
2011   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
2012   jj = 0;
2013   for (i=0; i<m; i++) {
2014     for (j=0; j<ourlens[i]; j++) {
2015       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
2016       jj++;
2017     }
2018   }
2019 
2020   /* create our matrix */
2021   for (i=0; i<m; i++) {
2022     ourlens[i] -= offlens[i];
2023   }
2024   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2025   ierr = MatSetType(A,type);CHKERRQ(ierr);
2026   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2027 
2028   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2029   for (i=0; i<m; i++) {
2030     ourlens[i] += offlens[i];
2031   }
2032 
2033   if (!rank) {
2034     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2035 
2036     /* read in my part of the matrix numerical values  */
2037     nz   = procsnz[0];
2038     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2039 
2040     /* insert into matrix */
2041     jj      = rstart;
2042     smycols = mycols;
2043     svals   = vals;
2044     for (i=0; i<m; i++) {
2045       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2046       smycols += ourlens[i];
2047       svals   += ourlens[i];
2048       jj++;
2049     }
2050 
2051     /* read in other processors and ship out */
2052     for (i=1; i<size; i++) {
2053       nz   = procsnz[i];
2054       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2055       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2056     }
2057     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2058   } else {
2059     /* receive numeric values */
2060     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2061 
2062     /* receive message of values*/
2063     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2064     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2065     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2066 
2067     /* insert into matrix */
2068     jj      = rstart;
2069     smycols = mycols;
2070     svals   = vals;
2071     for (i=0; i<m; i++) {
2072       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2073       smycols += ourlens[i];
2074       svals   += ourlens[i];
2075       jj++;
2076     }
2077   }
2078   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2079   ierr = PetscFree(vals);CHKERRQ(ierr);
2080   ierr = PetscFree(mycols);CHKERRQ(ierr);
2081   ierr = PetscFree(rowners);CHKERRQ(ierr);
2082 
2083   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2085 #if defined(PETSC_HAVE_SUPERLUDIST)
2086   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2087   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2088 #endif
2089 #if defined(PETSC_HAVE_MUMPS)
2090   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_mumps",&flag);CHKERRQ(ierr);
2091   if (flag) { ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr); }
2092 #endif
2093   *newmat = A;
2094   PetscFunctionReturn(0);
2095 }
2096 EXTERN_C_END
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2100 /*
2101     Not great since it makes two copies of the submatrix, first an SeqAIJ
2102   in local and then by concatenating the local matrices the end result.
2103   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2104 */
2105 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2106 {
2107   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2108   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2109   Mat          *local,M,Mreuse;
2110   PetscScalar  *vwork,*aa;
2111   MPI_Comm     comm = mat->comm;
2112   Mat_SeqAIJ   *aij;
2113 
2114 
2115   PetscFunctionBegin;
2116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2117   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2118 
2119   if (call ==  MAT_REUSE_MATRIX) {
2120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2121     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2122     local = &Mreuse;
2123     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2124   } else {
2125     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2126     Mreuse = *local;
2127     ierr   = PetscFree(local);CHKERRQ(ierr);
2128   }
2129 
2130   /*
2131       m - number of local rows
2132       n - number of columns (same on all processors)
2133       rstart - first row in new global matrix generated
2134   */
2135   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2136   if (call == MAT_INITIAL_MATRIX) {
2137     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2138     ii  = aij->i;
2139     jj  = aij->j;
2140 
2141     /*
2142         Determine the number of non-zeros in the diagonal and off-diagonal
2143         portions of the matrix in order to do correct preallocation
2144     */
2145 
2146     /* first get start and end of "diagonal" columns */
2147     if (csize == PETSC_DECIDE) {
2148       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2149       if (mglobal == n) { /* square matrix */
2150 	nlocal = m;
2151       } else {
2152         nlocal = n/size + ((n % size) > rank);
2153       }
2154     } else {
2155       nlocal = csize;
2156     }
2157     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2158     rstart = rend - nlocal;
2159     if (rank == size - 1 && rend != n) {
2160       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2161     }
2162 
2163     /* next, compute all the lengths */
2164     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2165     olens = dlens + m;
2166     for (i=0; i<m; i++) {
2167       jend = ii[i+1] - ii[i];
2168       olen = 0;
2169       dlen = 0;
2170       for (j=0; j<jend; j++) {
2171         if (*jj < rstart || *jj >= rend) olen++;
2172         else dlen++;
2173         jj++;
2174       }
2175       olens[i] = olen;
2176       dlens[i] = dlen;
2177     }
2178     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2179     ierr = PetscFree(dlens);CHKERRQ(ierr);
2180   } else {
2181     int ml,nl;
2182 
2183     M = *newmat;
2184     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2185     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2186     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2187     /*
2188          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2189        rather than the slower MatSetValues().
2190     */
2191     M->was_assembled = PETSC_TRUE;
2192     M->assembled     = PETSC_FALSE;
2193   }
2194   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2195   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2196   ii  = aij->i;
2197   jj  = aij->j;
2198   aa  = aij->a;
2199   for (i=0; i<m; i++) {
2200     row   = rstart + i;
2201     nz    = ii[i+1] - ii[i];
2202     cwork = jj;     jj += nz;
2203     vwork = aa;     aa += nz;
2204     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2205   }
2206 
2207   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2208   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2209   *newmat = M;
2210 
2211   /* save submatrix used in processor for next request */
2212   if (call ==  MAT_INITIAL_MATRIX) {
2213     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2214     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2215   }
2216 
2217   PetscFunctionReturn(0);
2218 }
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2222 /*@C
2223    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2224    (the default parallel PETSc format).  For good matrix assembly performance
2225    the user should preallocate the matrix storage by setting the parameters
2226    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2227    performance can be increased by more than a factor of 50.
2228 
2229    Collective on MPI_Comm
2230 
2231    Input Parameters:
2232 +  A - the matrix
2233 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2234            (same value is used for all local rows)
2235 .  d_nnz - array containing the number of nonzeros in the various rows of the
2236            DIAGONAL portion of the local submatrix (possibly different for each row)
2237            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2238            The size of this array is equal to the number of local rows, i.e 'm'.
2239            You must leave room for the diagonal entry even if it is zero.
2240 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2241            submatrix (same value is used for all local rows).
2242 -  o_nnz - array containing the number of nonzeros in the various rows of the
2243            OFF-DIAGONAL portion of the local submatrix (possibly different for
2244            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2245            structure. The size of this array is equal to the number
2246            of local rows, i.e 'm'.
2247 
2248    The AIJ format (also called the Yale sparse matrix format or
2249    compressed row storage), is fully compatible with standard Fortran 77
2250    storage.  That is, the stored row and column indices can begin at
2251    either one (as in Fortran) or zero.  See the users manual for details.
2252 
2253    The user MUST specify either the local or global matrix dimensions
2254    (possibly both).
2255 
2256    The parallel matrix is partitioned such that the first m0 rows belong to
2257    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2258    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2259 
2260    The DIAGONAL portion of the local submatrix of a processor can be defined
2261    as the submatrix which is obtained by extraction the part corresponding
2262    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2263    first row that belongs to the processor, and r2 is the last row belonging
2264    to the this processor. This is a square mxm matrix. The remaining portion
2265    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2266 
2267    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2268 
2269    By default, this format uses inodes (identical nodes) when possible.
2270    We search for consecutive rows with the same nonzero structure, thereby
2271    reusing matrix information to achieve increased efficiency.
2272 
2273    Options Database Keys:
2274 +  -mat_aij_no_inode  - Do not use inodes
2275 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2276 -  -mat_aij_oneindex - Internally use indexing starting at 1
2277         rather than 0.  Note that when calling MatSetValues(),
2278         the user still MUST index entries starting at 0!
2279 
2280    Example usage:
2281 
2282    Consider the following 8x8 matrix with 34 non-zero values, that is
2283    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2284    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2285    as follows:
2286 
2287 .vb
2288             1  2  0  |  0  3  0  |  0  4
2289     Proc0   0  5  6  |  7  0  0  |  8  0
2290             9  0 10  | 11  0  0  | 12  0
2291     -------------------------------------
2292            13  0 14  | 15 16 17  |  0  0
2293     Proc1   0 18  0  | 19 20 21  |  0  0
2294             0  0  0  | 22 23  0  | 24  0
2295     -------------------------------------
2296     Proc2  25 26 27  |  0  0 28  | 29  0
2297            30  0  0  | 31 32 33  |  0 34
2298 .ve
2299 
2300    This can be represented as a collection of submatrices as:
2301 
2302 .vb
2303       A B C
2304       D E F
2305       G H I
2306 .ve
2307 
2308    Where the submatrices A,B,C are owned by proc0, D,E,F are
2309    owned by proc1, G,H,I are owned by proc2.
2310 
2311    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2312    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2313    The 'M','N' parameters are 8,8, and have the same values on all procs.
2314 
2315    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2316    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2317    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2318    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2319    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2320    matrix, ans [DF] as another SeqAIJ matrix.
2321 
2322    When d_nz, o_nz parameters are specified, d_nz storage elements are
2323    allocated for every row of the local diagonal submatrix, and o_nz
2324    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2325    One way to choose d_nz and o_nz is to use the max nonzerors per local
2326    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2327    In this case, the values of d_nz,o_nz are:
2328 .vb
2329      proc0 : dnz = 2, o_nz = 2
2330      proc1 : dnz = 3, o_nz = 2
2331      proc2 : dnz = 1, o_nz = 4
2332 .ve
2333    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2334    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2335    for proc3. i.e we are using 12+15+10=37 storage locations to store
2336    34 values.
2337 
2338    When d_nnz, o_nnz parameters are specified, the storage is specified
2339    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2340    In the above case the values for d_nnz,o_nnz are:
2341 .vb
2342      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2343      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2344      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2345 .ve
2346    Here the space allocated is sum of all the above values i.e 34, and
2347    hence pre-allocation is perfect.
2348 
2349    Level: intermediate
2350 
2351 .keywords: matrix, aij, compressed row, sparse, parallel
2352 
2353 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2354 @*/
2355 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2356 {
2357   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2358 
2359   PetscFunctionBegin;
2360   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2361   if (f) {
2362     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2363   }
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatCreateMPIAIJ"
2369 /*@C
2370    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2371    (the default parallel PETSc format).  For good matrix assembly performance
2372    the user should preallocate the matrix storage by setting the parameters
2373    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2374    performance can be increased by more than a factor of 50.
2375 
2376    Collective on MPI_Comm
2377 
2378    Input Parameters:
2379 +  comm - MPI communicator
2380 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2381            This value should be the same as the local size used in creating the
2382            y vector for the matrix-vector product y = Ax.
2383 .  n - This value should be the same as the local size used in creating the
2384        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2385        calculated if N is given) For square matrices n is almost always m.
2386 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2387 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2388 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2389            (same value is used for all local rows)
2390 .  d_nnz - array containing the number of nonzeros in the various rows of the
2391            DIAGONAL portion of the local submatrix (possibly different for each row)
2392            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2393            The size of this array is equal to the number of local rows, i.e 'm'.
2394            You must leave room for the diagonal entry even if it is zero.
2395 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2396            submatrix (same value is used for all local rows).
2397 -  o_nnz - array containing the number of nonzeros in the various rows of the
2398            OFF-DIAGONAL portion of the local submatrix (possibly different for
2399            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2400            structure. The size of this array is equal to the number
2401            of local rows, i.e 'm'.
2402 
2403    Output Parameter:
2404 .  A - the matrix
2405 
2406    Notes:
2407    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2408    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2409    storage requirements for this matrix.
2410 
2411    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2412    processor than it must be used on all processors that share the object for
2413    that argument.
2414 
2415    The AIJ format (also called the Yale sparse matrix format or
2416    compressed row storage), is fully compatible with standard Fortran 77
2417    storage.  That is, the stored row and column indices can begin at
2418    either one (as in Fortran) or zero.  See the users manual for details.
2419 
2420    The user MUST specify either the local or global matrix dimensions
2421    (possibly both).
2422 
2423    The parallel matrix is partitioned such that the first m0 rows belong to
2424    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2425    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2426 
2427    The DIAGONAL portion of the local submatrix of a processor can be defined
2428    as the submatrix which is obtained by extraction the part corresponding
2429    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2430    first row that belongs to the processor, and r2 is the last row belonging
2431    to the this processor. This is a square mxm matrix. The remaining portion
2432    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2433 
2434    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2435 
2436    When calling this routine with a single process communicator, a matrix of
2437    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2438    type of communicator, use the construction mechanism:
2439      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2440 
2441    By default, this format uses inodes (identical nodes) when possible.
2442    We search for consecutive rows with the same nonzero structure, thereby
2443    reusing matrix information to achieve increased efficiency.
2444 
2445    Options Database Keys:
2446 +  -mat_aij_no_inode  - Do not use inodes
2447 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2448 -  -mat_aij_oneindex - Internally use indexing starting at 1
2449         rather than 0.  Note that when calling MatSetValues(),
2450         the user still MUST index entries starting at 0!
2451 
2452 
2453    Example usage:
2454 
2455    Consider the following 8x8 matrix with 34 non-zero values, that is
2456    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2457    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2458    as follows:
2459 
2460 .vb
2461             1  2  0  |  0  3  0  |  0  4
2462     Proc0   0  5  6  |  7  0  0  |  8  0
2463             9  0 10  | 11  0  0  | 12  0
2464     -------------------------------------
2465            13  0 14  | 15 16 17  |  0  0
2466     Proc1   0 18  0  | 19 20 21  |  0  0
2467             0  0  0  | 22 23  0  | 24  0
2468     -------------------------------------
2469     Proc2  25 26 27  |  0  0 28  | 29  0
2470            30  0  0  | 31 32 33  |  0 34
2471 .ve
2472 
2473    This can be represented as a collection of submatrices as:
2474 
2475 .vb
2476       A B C
2477       D E F
2478       G H I
2479 .ve
2480 
2481    Where the submatrices A,B,C are owned by proc0, D,E,F are
2482    owned by proc1, G,H,I are owned by proc2.
2483 
2484    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2485    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2486    The 'M','N' parameters are 8,8, and have the same values on all procs.
2487 
2488    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2489    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2490    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2491    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2492    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2493    matrix, ans [DF] as another SeqAIJ matrix.
2494 
2495    When d_nz, o_nz parameters are specified, d_nz storage elements are
2496    allocated for every row of the local diagonal submatrix, and o_nz
2497    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2498    One way to choose d_nz and o_nz is to use the max nonzerors per local
2499    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2500    In this case, the values of d_nz,o_nz are:
2501 .vb
2502      proc0 : dnz = 2, o_nz = 2
2503      proc1 : dnz = 3, o_nz = 2
2504      proc2 : dnz = 1, o_nz = 4
2505 .ve
2506    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2507    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2508    for proc3. i.e we are using 12+15+10=37 storage locations to store
2509    34 values.
2510 
2511    When d_nnz, o_nnz parameters are specified, the storage is specified
2512    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2513    In the above case the values for d_nnz,o_nnz are:
2514 .vb
2515      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2516      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2517      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2518 .ve
2519    Here the space allocated is sum of all the above values i.e 34, and
2520    hence pre-allocation is perfect.
2521 
2522    Level: intermediate
2523 
2524 .keywords: matrix, aij, compressed row, sparse, parallel
2525 
2526 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2527 @*/
2528 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2529 {
2530   int ierr,size;
2531 
2532   PetscFunctionBegin;
2533   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2535   if (size > 1) {
2536     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2537     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2538   } else {
2539     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2540     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2541   }
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2547 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2548 {
2549   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2550   PetscFunctionBegin;
2551   *Ad     = a->A;
2552   *Ao     = a->B;
2553   *colmap = a->garray;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2559 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2560 {
2561   int        ierr,i;
2562   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2563 
2564   PetscFunctionBegin;
2565   if (coloring->ctype == IS_COLORING_LOCAL) {
2566     ISColoringValue *allcolors,*colors;
2567     ISColoring      ocoloring;
2568 
2569     /* set coloring for diagonal portion */
2570     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2571 
2572     /* set coloring for off-diagonal portion */
2573     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2574     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2575     for (i=0; i<a->B->n; i++) {
2576       colors[i] = allcolors[a->garray[i]];
2577     }
2578     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2579     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2580     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2581     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2582   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2583     ISColoringValue *colors;
2584     int             *larray;
2585     ISColoring      ocoloring;
2586 
2587     /* set coloring for diagonal portion */
2588     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2589     for (i=0; i<a->A->n; i++) {
2590       larray[i] = i + a->cstart;
2591     }
2592     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2593     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2594     for (i=0; i<a->A->n; i++) {
2595       colors[i] = coloring->colors[larray[i]];
2596     }
2597     ierr = PetscFree(larray);CHKERRQ(ierr);
2598     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2599     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2600     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2601 
2602     /* set coloring for off-diagonal portion */
2603     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2604     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2605     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2606     for (i=0; i<a->B->n; i++) {
2607       colors[i] = coloring->colors[larray[i]];
2608     }
2609     ierr = PetscFree(larray);CHKERRQ(ierr);
2610     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2611     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2612     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2613   } else {
2614     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2615   }
2616 
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2622 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2623 {
2624   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2625   int        ierr;
2626 
2627   PetscFunctionBegin;
2628   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2629   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2635 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2636 {
2637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2638   int        ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2642   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 #undef __FUNCT__
2647 #define __FUNCT__ "MatMerge"
2648 /*@C
2649       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2650                  matrices from each processor
2651 
2652     Collective on MPI_Comm
2653 
2654    Input Parameters:
2655 +    comm - the communicators the parallel matrix will live on
2656 -    inmat - the input sequential matrices
2657 
2658    Output Parameter:
2659 .    outmat - the parallel matrix generated
2660 
2661     Level: advanced
2662 
2663    Notes: The number of columns of the matrix in EACH of the seperate files
2664       MUST be the same.
2665 
2666 @*/
2667 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2668 {
2669   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2670   PetscScalar *values;
2671   PetscMap    columnmap,rowmap;
2672 
2673   PetscFunctionBegin;
2674 
2675   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2676 
2677   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2678   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2679   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2680   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2681   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2682   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2683 
2684   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2685   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2686   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2687   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2688   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2689 
2690   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2691   for (i=0;i<m;i++) {
2692     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2693     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2694     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2695   }
2696   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2697   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2698 
2699   for (i=0;i<m;i++) {
2700     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2701     I    = i + rstart;
2702     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2703     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2704   }
2705   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2706   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2708 
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatFileSplit"
2714 int MatFileSplit(Mat A,char *outfile)
2715 {
2716   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2717   PetscViewer out;
2718   char        *name;
2719   Mat         B;
2720   PetscScalar *values;
2721 
2722   PetscFunctionBegin;
2723 
2724   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2725   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2726   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2727   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2728   for (i=0;i<m;i++) {
2729     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2730     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2731     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2732   }
2733   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2734   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2735 
2736   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2737   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2738   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2739   sprintf(name,"%s.%d",outfile,rank);
2740   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2741   ierr = PetscFree(name);
2742   ierr = MatView(B,out);CHKERRQ(ierr);
2743   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2744   ierr = MatDestroy(B);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747