xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 4f42391081ce477c4a7c21159b2fd4ea9242ba47)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift=0;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
289     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
294         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 
359   PetscFunctionBegin;
360   if (!aij->donotstash) {
361     while (1) {
362       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
363       if (!flg) break;
364 
365       for (i=0; i<n;) {
366         /* Now identify the consecutive vals belonging to the same row */
367         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
368         if (j < n) ncols = j-i;
369         else       ncols = n-i;
370         /* Now assemble all these values with a single function call */
371         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
372         i = j;
373       }
374     }
375     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
376   }
377 
378   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
379   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
380 
381   /* determine if any processor has disassembled, if so we must
382      also disassemble ourselfs, in order that we may reassemble. */
383   /*
384      if nonzero structure of submatrix B cannot change then we know that
385      no processor disassembled thus we can skip this stuff
386   */
387   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
388     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
389     if (mat->was_assembled && !other_disassembled) {
390       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
391     }
392   }
393 
394   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
395     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
396   }
397   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
398   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
399 
400   if (aij->rowvalues) {
401     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
402     aij->rowvalues = 0;
403   }
404 
405   /* used by MatAXPY() */
406   a->xtoy = 0; b->xtoy = 0;
407   a->XtoY = 0; b->XtoY = 0;
408 
409   PetscFunctionReturn(0);
410 }
411 
412 #undef __FUNCT__
413 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
414 int MatZeroEntries_MPIAIJ(Mat A)
415 {
416   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
417   int        ierr;
418 
419   PetscFunctionBegin;
420   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
421   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
422   PetscFunctionReturn(0);
423 }
424 
425 #undef __FUNCT__
426 #define __FUNCT__ "MatZeroRows_MPIAIJ"
427 int MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
428 {
429   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
430   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
431   int            *nprocs,j,idx,nsends,row;
432   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
433   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
434   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
435   MPI_Comm       comm = A->comm;
436   MPI_Request    *send_waits,*recv_waits;
437   MPI_Status     recv_status,*send_status;
438   IS             istmp;
439   PetscTruth     found;
440 
441   PetscFunctionBegin;
442   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
443   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
444 
445   /*  first count number of contributors to each processor */
446   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
447   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
448   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
449   for (i=0; i<N; i++) {
450     idx = rows[i];
451     found = PETSC_FALSE;
452     for (j=0; j<size; j++) {
453       if (idx >= owners[j] && idx < owners[j+1]) {
454         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
455       }
456     }
457     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
458   }
459   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460 
461   /* inform other processors of number of messages and max length*/
462   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
463 
464   /* post receives:   */
465   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
466   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
467   for (i=0; i<nrecvs; i++) {
468     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
469   }
470 
471   /* do sends:
472       1) starts[i] gives the starting index in svalues for stuff going to
473          the ith processor
474   */
475   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
476   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
477   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
478   starts[0] = 0;
479   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
480   for (i=0; i<N; i++) {
481     svalues[starts[owner[i]]++] = rows[i];
482   }
483   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
484 
485   starts[0] = 0;
486   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
487   count = 0;
488   for (i=0; i<size; i++) {
489     if (nprocs[2*i+1]) {
490       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
491     }
492   }
493   ierr = PetscFree(starts);CHKERRQ(ierr);
494 
495   base = owners[rank];
496 
497   /*  wait on receives */
498   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
499   source = lens + nrecvs;
500   count  = nrecvs; slen = 0;
501   while (count) {
502     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
503     /* unpack receives into our local space */
504     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
505     source[imdex]  = recv_status.MPI_SOURCE;
506     lens[imdex]    = n;
507     slen          += n;
508     count--;
509   }
510   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
511 
512   /* move the data into the send scatter */
513   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
514   count = 0;
515   for (i=0; i<nrecvs; i++) {
516     values = rvalues + i*nmax;
517     for (j=0; j<lens[i]; j++) {
518       lrows[count++] = values[j] - base;
519     }
520   }
521   ierr = PetscFree(rvalues);CHKERRQ(ierr);
522   ierr = PetscFree(lens);CHKERRQ(ierr);
523   ierr = PetscFree(owner);CHKERRQ(ierr);
524   ierr = PetscFree(nprocs);CHKERRQ(ierr);
525 
526   /* actually zap the local rows */
527   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
528   PetscLogObjectParent(A,istmp);
529 
530   /*
531         Zero the required rows. If the "diagonal block" of the matrix
532      is square and the user wishes to set the diagonal we use seperate
533      code so that MatSetValues() is not called for each diagonal allocating
534      new memory, thus calling lots of mallocs and slowing things down.
535 
536        Contributed by: Mathew Knepley
537   */
538   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
539   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
540   if (diag && (l->A->M == l->A->N)) {
541     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
542   } else if (diag) {
543     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
544     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
545       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
546 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
547     }
548     for (i = 0; i < slen; i++) {
549       row  = lrows[i] + rstart;
550       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
551     }
552     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
553     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
554   } else {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556   }
557   ierr = ISDestroy(istmp);CHKERRQ(ierr);
558   ierr = PetscFree(lrows);CHKERRQ(ierr);
559 
560   /* wait on sends */
561   if (nsends) {
562     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
563     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
564     ierr = PetscFree(send_status);CHKERRQ(ierr);
565   }
566   ierr = PetscFree(send_waits);CHKERRQ(ierr);
567   ierr = PetscFree(svalues);CHKERRQ(ierr);
568 
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatMult_MPIAIJ"
574 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
575 {
576   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
577   int        ierr,nt;
578 
579   PetscFunctionBegin;
580   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
581   if (nt != A->n) {
582     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
583   }
584   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
585   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
586   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
587   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
588   PetscFunctionReturn(0);
589 }
590 
591 #undef __FUNCT__
592 #define __FUNCT__ "MatMultAdd_MPIAIJ"
593 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
594 {
595   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
596   int        ierr;
597 
598   PetscFunctionBegin;
599   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
600   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
601   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
602   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
608 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
609 {
610   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
611   int        ierr;
612 
613   PetscFunctionBegin;
614   /* do nondiagonal part */
615   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
616   /* send it on its way */
617   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
618   /* do local part */
619   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
620   /* receive remote parts: note this assumes the values are not actually */
621   /* inserted in yy until the next line, which is true for my implementation*/
622   /* but is not perhaps always true. */
623   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   PetscFunctionReturn(0);
625 }
626 
627 EXTERN_C_BEGIN
628 #undef __FUNCT__
629 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
630 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
631 {
632   MPI_Comm comm;
633   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
634   Mat        Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
635   IS         Me,Notme;
636   int        M,N,first,last,*notme,ntids,i, ierr;
637 
638   PetscFunctionBegin;
639 
640   /* Easy test: symmetric diagonal block */
641   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
642   ierr = MatIsSymmetric(Adia,Bdia,f); CHKERRQ(ierr);
643   if (!*f) PetscFunctionReturn(0);
644   ierr = PetscObjectGetComm((PetscObject)Amat,&comm); CHKERRQ(ierr);
645   ierr = MPI_Comm_size(comm,&ntids); CHKERRQ(ierr);
646   if (ntids==1) PetscFunctionReturn(0);
647 
648   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
649   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
650   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
651   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
652   for (i=0; i<first; i++) notme[i] = i;
653   for (i=last; i<M; i++) notme[i-last+first] = i;
654   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
655   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
656   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs); CHKERRQ(ierr);
657   Aoff = Aoffs[0];
658   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs); CHKERRQ(ierr);
659   Boff = Boffs[0];
660   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
661   ierr = MatDestroyMatrices(1,&Aoffs); CHKERRQ(ierr);
662   ierr = MatDestroyMatrices(1,&Boffs); CHKERRQ(ierr);
663   ierr = ISDestroy(Me); CHKERRQ(ierr);
664   ierr = ISDestroy(Notme); CHKERRQ(ierr);
665 
666   PetscFunctionReturn(0);
667 }
668 EXTERN_C_END
669 
670 #undef __FUNCT__
671 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
672 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
673 {
674   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
675   int        ierr;
676 
677   PetscFunctionBegin;
678   /* do nondiagonal part */
679   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
680   /* send it on its way */
681   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
682   /* do local part */
683   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
684   /* receive remote parts: note this assumes the values are not actually */
685   /* inserted in yy until the next line, which is true for my implementation*/
686   /* but is not perhaps always true. */
687   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692   This only works correctly for square matrices where the subblock A->A is the
693    diagonal block
694 */
695 #undef __FUNCT__
696 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
697 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
698 {
699   int        ierr;
700   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
701 
702   PetscFunctionBegin;
703   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
704   if (a->rstart != a->cstart || a->rend != a->cend) {
705     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
706   }
707   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatScale_MPIAIJ"
713 int MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
714 {
715   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
716   int        ierr;
717 
718   PetscFunctionBegin;
719   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
720   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatDestroy_MPIAIJ"
726 int MatDestroy_MPIAIJ(Mat mat)
727 {
728   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
729   int        ierr;
730 
731   PetscFunctionBegin;
732 #if defined(PETSC_USE_LOG)
733   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
734 #endif
735   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
736   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
737   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
738   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
739 #if defined (PETSC_USE_CTABLE)
740   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
741 #else
742   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
743 #endif
744   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
745   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
746   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
747   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
748   ierr = PetscFree(aij);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNCT__
753 #define __FUNCT__ "MatView_MPIAIJ_Binary"
754 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
755 {
756   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
757   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
758   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
759   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
760   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
761   PetscScalar       *column_values;
762 
763   PetscFunctionBegin;
764   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
765   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
766   nz   = A->nz + B->nz;
767   if (rank == 0) {
768     header[0] = MAT_FILE_COOKIE;
769     header[1] = mat->M;
770     header[2] = mat->N;
771     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
772     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
773     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
774     /* get largest number of rows any processor has */
775     rlen = mat->m;
776     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
777     for (i=1; i<size; i++) {
778       rlen = PetscMax(rlen,range[i+1] - range[i]);
779     }
780   } else {
781     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
782     rlen = mat->m;
783   }
784 
785   /* load up the local row counts */
786   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
787   for (i=0; i<mat->m; i++) {
788     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
789   }
790 
791   /* store the row lengths to the file */
792   if (rank == 0) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       rlen = range[i+1] - range[i];
797       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802   }
803   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
804 
805   /* load up the local column indices */
806   nzmax = nz; /* )th processor needs space a largest processor needs */
807   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
808   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( (col = garray[B->j[j]]) > cstart) break;
813       column_indices[cnt++] = col;
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_indices[cnt++] = A->j[k] + cstart;
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_indices[cnt++] = garray[B->j[j]];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
823 
824   /* store the column indices to the file */
825   if (rank == 0) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
831       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_indices);CHKERRQ(ierr);
839 
840   /* load up the local column values */
841   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
842   cnt  = 0;
843   for (i=0; i<mat->m; i++) {
844     for (j=B->i[i]; j<B->i[i+1]; j++) {
845       if ( garray[B->j[j]] > cstart) break;
846       column_values[cnt++] = B->a[j];
847     }
848     for (k=A->i[i]; k<A->i[i+1]; k++) {
849       column_values[cnt++] = A->a[k];
850     }
851     for (; j<B->i[i+1]; j++) {
852       column_values[cnt++] = B->a[j];
853     }
854   }
855   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
856 
857   /* store the column values to the file */
858   if (rank == 0) {
859     MPI_Status status;
860     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
861     for (i=1; i<size; i++) {
862       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
863       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
864       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
865       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
866     }
867   } else {
868     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
869     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
870   }
871   ierr = PetscFree(column_values);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
877 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
878 {
879   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
880   int               ierr,rank = aij->rank,size = aij->size;
881   PetscTruth        isdraw,isascii,flg,isbinary;
882   PetscViewer       sviewer;
883   PetscViewerFormat format;
884 
885   PetscFunctionBegin;
886   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
887   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
888   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
889   if (isascii) {
890     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
891     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
892       MatInfo info;
893       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
894       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
895       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
896       if (flg) {
897         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
898 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
899       } else {
900         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
901 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
902       }
903       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
904       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
905       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
906       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
907       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
908       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
909       PetscFunctionReturn(0);
910     } else if (format == PETSC_VIEWER_ASCII_INFO) {
911       PetscFunctionReturn(0);
912     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
913       PetscFunctionReturn(0);
914     }
915   } else if (isbinary) {
916     if (size == 1) {
917       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919     } else {
920       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
921     }
922     PetscFunctionReturn(0);
923   } else if (isdraw) {
924     PetscDraw  draw;
925     PetscTruth isnull;
926     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
927     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
928   }
929 
930   if (size == 1) {
931     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
932     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
933   } else {
934     /* assemble the entire matrix onto first processor. */
935     Mat         A;
936     Mat_SeqAIJ *Aloc;
937     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
938     PetscScalar *a;
939 
940     if (!rank) {
941       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
942     } else {
943       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
944     }
945     PetscLogObjectParent(mat,A);
946 
947     /* copy over the A part */
948     Aloc = (Mat_SeqAIJ*)aij->A->data;
949     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
950     row = aij->rstart;
951     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
952     for (i=0; i<m; i++) {
953       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
954       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
955     }
956     aj = Aloc->j;
957     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
958 
959     /* copy over the B part */
960     Aloc = (Mat_SeqAIJ*)aij->B->data;
961     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
962     row  = aij->rstart;
963     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
964     ct   = cols;
965     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
966     for (i=0; i<m; i++) {
967       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
968       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
969     }
970     ierr = PetscFree(ct);CHKERRQ(ierr);
971     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
973     /*
974        Everyone has to call to draw the matrix since the graphics waits are
975        synchronized across all processors that share the PetscDraw object
976     */
977     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
978     if (!rank) {
979       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
980       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
981     }
982     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
983     ierr = MatDestroy(A);CHKERRQ(ierr);
984   }
985   PetscFunctionReturn(0);
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "MatView_MPIAIJ"
990 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
991 {
992   int        ierr;
993   PetscTruth isascii,isdraw,issocket,isbinary;
994 
995   PetscFunctionBegin;
996   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
997   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
998   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
999   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1000   if (isascii || isdraw || isbinary || issocket) {
1001     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1002   } else {
1003     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1004   }
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatRelax_MPIAIJ"
1012 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1013 {
1014   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1015   int          ierr;
1016   Vec          bb1;
1017   PetscScalar  mone=-1.0;
1018 
1019   PetscFunctionBegin;
1020   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1021 
1022   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1023 
1024   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1025     if (flag & SOR_ZERO_INITIAL_GUESS) {
1026       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1027       its--;
1028     }
1029 
1030     while (its--) {
1031       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1032       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1033 
1034       /* update rhs: bb1 = bb - B*x */
1035       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1036       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1037 
1038       /* local sweep */
1039       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1040       CHKERRQ(ierr);
1041     }
1042   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1043     if (flag & SOR_ZERO_INITIAL_GUESS) {
1044       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1045       its--;
1046     }
1047     while (its--) {
1048       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1049       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1050 
1051       /* update rhs: bb1 = bb - B*x */
1052       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1053       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1054 
1055       /* local sweep */
1056       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1057       CHKERRQ(ierr);
1058     }
1059   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1060     if (flag & SOR_ZERO_INITIAL_GUESS) {
1061       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1062       its--;
1063     }
1064     while (its--) {
1065       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1066       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1067 
1068       /* update rhs: bb1 = bb - B*x */
1069       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1070       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1071 
1072       /* local sweep */
1073       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1074       CHKERRQ(ierr);
1075     }
1076   } else {
1077     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1078   }
1079 
1080   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1086 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1087 {
1088   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1089   Mat        A = mat->A,B = mat->B;
1090   int        ierr;
1091   PetscReal  isend[5],irecv[5];
1092 
1093   PetscFunctionBegin;
1094   info->block_size     = 1.0;
1095   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1096   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1097   isend[3] = info->memory;  isend[4] = info->mallocs;
1098   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1099   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1100   isend[3] += info->memory;  isend[4] += info->mallocs;
1101   if (flag == MAT_LOCAL) {
1102     info->nz_used      = isend[0];
1103     info->nz_allocated = isend[1];
1104     info->nz_unneeded  = isend[2];
1105     info->memory       = isend[3];
1106     info->mallocs      = isend[4];
1107   } else if (flag == MAT_GLOBAL_MAX) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else if (flag == MAT_GLOBAL_SUM) {
1115     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1116     info->nz_used      = irecv[0];
1117     info->nz_allocated = irecv[1];
1118     info->nz_unneeded  = irecv[2];
1119     info->memory       = irecv[3];
1120     info->mallocs      = irecv[4];
1121   }
1122   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1123   info->fill_ratio_needed = 0;
1124   info->factor_mallocs    = 0;
1125   info->rows_global       = (double)matin->M;
1126   info->columns_global    = (double)matin->N;
1127   info->rows_local        = (double)matin->m;
1128   info->columns_local     = (double)matin->N;
1129 
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatSetOption_MPIAIJ"
1135 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1136 {
1137   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1138   int        ierr;
1139 
1140   PetscFunctionBegin;
1141   switch (op) {
1142   case MAT_NO_NEW_NONZERO_LOCATIONS:
1143   case MAT_YES_NEW_NONZERO_LOCATIONS:
1144   case MAT_COLUMNS_UNSORTED:
1145   case MAT_COLUMNS_SORTED:
1146   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1147   case MAT_KEEP_ZEROED_ROWS:
1148   case MAT_NEW_NONZERO_LOCATION_ERR:
1149   case MAT_USE_INODES:
1150   case MAT_DO_NOT_USE_INODES:
1151   case MAT_IGNORE_ZERO_ENTRIES:
1152     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1153     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1154     break;
1155   case MAT_ROW_ORIENTED:
1156     a->roworiented = PETSC_TRUE;
1157     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1158     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1159     break;
1160   case MAT_ROWS_SORTED:
1161   case MAT_ROWS_UNSORTED:
1162   case MAT_YES_NEW_DIAGONALS:
1163     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1164     break;
1165   case MAT_COLUMN_ORIENTED:
1166     a->roworiented = PETSC_FALSE;
1167     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1168     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_IGNORE_OFF_PROC_ENTRIES:
1171     a->donotstash = PETSC_TRUE;
1172     break;
1173   case MAT_NO_NEW_DIAGONALS:
1174     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1175   default:
1176     SETERRQ(PETSC_ERR_SUP,"unknown option");
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatGetRow_MPIAIJ"
1183 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1184 {
1185   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1186   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1187   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1188   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1189   int          *cmap,*idx_p;
1190 
1191   PetscFunctionBegin;
1192   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1193   mat->getrowactive = PETSC_TRUE;
1194 
1195   if (!mat->rowvalues && (idx || v)) {
1196     /*
1197         allocate enough space to hold information from the longest row.
1198     */
1199     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1200     int     max = 1,tmp;
1201     for (i=0; i<matin->m; i++) {
1202       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1203       if (max < tmp) { max = tmp; }
1204     }
1205     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1206     mat->rowindices = (int*)(mat->rowvalues + max);
1207   }
1208 
1209   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1210   lrow = row - rstart;
1211 
1212   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1213   if (!v)   {pvA = 0; pvB = 0;}
1214   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1215   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1216   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1217   nztot = nzA + nzB;
1218 
1219   cmap  = mat->garray;
1220   if (v  || idx) {
1221     if (nztot) {
1222       /* Sort by increasing column numbers, assuming A and B already sorted */
1223       int imark = -1;
1224       if (v) {
1225         *v = v_p = mat->rowvalues;
1226         for (i=0; i<nzB; i++) {
1227           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1228           else break;
1229         }
1230         imark = i;
1231         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1232         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1233       }
1234       if (idx) {
1235         *idx = idx_p = mat->rowindices;
1236         if (imark > -1) {
1237           for (i=0; i<imark; i++) {
1238             idx_p[i] = cmap[cworkB[i]];
1239           }
1240         } else {
1241           for (i=0; i<nzB; i++) {
1242             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1243             else break;
1244           }
1245           imark = i;
1246         }
1247         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1248         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1249       }
1250     } else {
1251       if (idx) *idx = 0;
1252       if (v)   *v   = 0;
1253     }
1254   }
1255   *nz = nztot;
1256   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1257   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1263 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1264 {
1265   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1266 
1267   PetscFunctionBegin;
1268   if (aij->getrowactive == PETSC_FALSE) {
1269     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1270   }
1271   aij->getrowactive = PETSC_FALSE;
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatNorm_MPIAIJ"
1277 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1278 {
1279   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1280   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1281   int          ierr,i,j,cstart = aij->cstart;
1282   PetscReal    sum = 0.0;
1283   PetscScalar  *v;
1284 
1285   PetscFunctionBegin;
1286   if (aij->size == 1) {
1287     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1288   } else {
1289     if (type == NORM_FROBENIUS) {
1290       v = amat->a;
1291       for (i=0; i<amat->nz; i++) {
1292 #if defined(PETSC_USE_COMPLEX)
1293         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1294 #else
1295         sum += (*v)*(*v); v++;
1296 #endif
1297       }
1298       v = bmat->a;
1299       for (i=0; i<bmat->nz; i++) {
1300 #if defined(PETSC_USE_COMPLEX)
1301         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1302 #else
1303         sum += (*v)*(*v); v++;
1304 #endif
1305       }
1306       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1307       *norm = sqrt(*norm);
1308     } else if (type == NORM_1) { /* max column norm */
1309       PetscReal *tmp,*tmp2;
1310       int    *jj,*garray = aij->garray;
1311       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1312       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1313       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1314       *norm = 0.0;
1315       v = amat->a; jj = amat->j;
1316       for (j=0; j<amat->nz; j++) {
1317         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1318       }
1319       v = bmat->a; jj = bmat->j;
1320       for (j=0; j<bmat->nz; j++) {
1321         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1322       }
1323       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1324       for (j=0; j<mat->N; j++) {
1325         if (tmp2[j] > *norm) *norm = tmp2[j];
1326       }
1327       ierr = PetscFree(tmp);CHKERRQ(ierr);
1328       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1329     } else if (type == NORM_INFINITY) { /* max row norm */
1330       PetscReal ntemp = 0.0;
1331       for (j=0; j<aij->A->m; j++) {
1332         v = amat->a + amat->i[j];
1333         sum = 0.0;
1334         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1335           sum += PetscAbsScalar(*v); v++;
1336         }
1337         v = bmat->a + bmat->i[j];
1338         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1339           sum += PetscAbsScalar(*v); v++;
1340         }
1341         if (sum > ntemp) ntemp = sum;
1342       }
1343       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1344     } else {
1345       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1346     }
1347   }
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatTranspose_MPIAIJ"
1353 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1354 {
1355   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1356   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1357   int          ierr;
1358   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1359   Mat          B;
1360   PetscScalar  *array;
1361 
1362   PetscFunctionBegin;
1363   if (!matout && M != N) {
1364     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365   }
1366 
1367   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1368 
1369   /* copy over the A part */
1370   Aloc = (Mat_SeqAIJ*)a->A->data;
1371   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1372   row = a->rstart;
1373   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1374   for (i=0; i<m; i++) {
1375     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1376     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1377   }
1378   aj = Aloc->j;
1379   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1380 
1381   /* copy over the B part */
1382   Aloc = (Mat_SeqAIJ*)a->B->data;
1383   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1384   row  = a->rstart;
1385   ierr = PetscMalloc((1+ai[m])*sizeof(int),&cols);CHKERRQ(ierr);
1386   ct   = cols;
1387   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1388   for (i=0; i<m; i++) {
1389     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1390     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1391   }
1392   ierr = PetscFree(ct);CHKERRQ(ierr);
1393   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395   if (matout) {
1396     *matout = B;
1397   } else {
1398     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1405 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1406 {
1407   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1408   Mat        a = aij->A,b = aij->B;
1409   int        ierr,s1,s2,s3;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1413   if (rr) {
1414     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1415     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1416     /* Overlap communication with computation. */
1417     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1418   }
1419   if (ll) {
1420     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1421     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1422     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1423   }
1424   /* scale  the diagonal block */
1425   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1426 
1427   if (rr) {
1428     /* Do a scatter end and then right scale the off-diagonal block */
1429     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1430     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1431   }
1432 
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1439 int MatPrintHelp_MPIAIJ(Mat A)
1440 {
1441   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1442   int        ierr;
1443 
1444   PetscFunctionBegin;
1445   if (!a->rank) {
1446     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1453 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1454 {
1455   PetscFunctionBegin;
1456   *bs = 1;
1457   PetscFunctionReturn(0);
1458 }
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1461 int MatSetUnfactored_MPIAIJ(Mat A)
1462 {
1463   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1464   int        ierr;
1465 
1466   PetscFunctionBegin;
1467   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatEqual_MPIAIJ"
1473 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1474 {
1475   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1476   Mat        a,b,c,d;
1477   PetscTruth flg;
1478   int        ierr;
1479 
1480   PetscFunctionBegin;
1481   a = matA->A; b = matA->B;
1482   c = matB->A; d = matB->B;
1483 
1484   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1485   if (flg == PETSC_TRUE) {
1486     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1487   }
1488   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 #undef __FUNCT__
1493 #define __FUNCT__ "MatCopy_MPIAIJ"
1494 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1495 {
1496   int        ierr;
1497   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1498   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1499 
1500   PetscFunctionBegin;
1501   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1502   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1503     /* because of the column compression in the off-processor part of the matrix a->B,
1504        the number of columns in a->B and b->B may be different, hence we cannot call
1505        the MatCopy() directly on the two parts. If need be, we can provide a more
1506        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1507        then copying the submatrices */
1508     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1509   } else {
1510     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1511     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1512   }
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1518 int MatSetUpPreallocation_MPIAIJ(Mat A)
1519 {
1520   int        ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1528 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS [],int);
1529 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1530 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
1531 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1532 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1533 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1534 #endif
1535 
1536 #include "petscblaslapack.h"
1537 extern int MatAXPY_SeqAIJ(const PetscScalar[],Mat,Mat,MatStructure);
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatAXPY_MPIAIJ"
1540 int MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1541 {
1542   int        ierr,one=1,i;
1543   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1544   Mat_SeqAIJ *x,*y;
1545 
1546   PetscFunctionBegin;
1547   if (str == SAME_NONZERO_PATTERN) {
1548     x = (Mat_SeqAIJ *)xx->A->data;
1549     y = (Mat_SeqAIJ *)yy->A->data;
1550     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1551     x = (Mat_SeqAIJ *)xx->B->data;
1552     y = (Mat_SeqAIJ *)yy->B->data;
1553     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1554   } else if (str == SUBSET_NONZERO_PATTERN) {
1555     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1556 
1557     x = (Mat_SeqAIJ *)xx->B->data;
1558     y = (Mat_SeqAIJ *)yy->B->data;
1559     if (y->xtoy && y->XtoY != xx->B) {
1560       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1561       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1562     }
1563     if (!y->xtoy) { /* get xtoy */
1564       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1565       y->XtoY = xx->B;
1566     }
1567     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1568   } else {
1569     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 /* -------------------------------------------------------------------*/
1575 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1576        MatGetRow_MPIAIJ,
1577        MatRestoreRow_MPIAIJ,
1578        MatMult_MPIAIJ,
1579        MatMultAdd_MPIAIJ,
1580        MatMultTranspose_MPIAIJ,
1581        MatMultTransposeAdd_MPIAIJ,
1582        0,
1583        0,
1584        0,
1585        0,
1586        0,
1587        0,
1588        MatRelax_MPIAIJ,
1589        MatTranspose_MPIAIJ,
1590        MatGetInfo_MPIAIJ,
1591        MatEqual_MPIAIJ,
1592        MatGetDiagonal_MPIAIJ,
1593        MatDiagonalScale_MPIAIJ,
1594        MatNorm_MPIAIJ,
1595        MatAssemblyBegin_MPIAIJ,
1596        MatAssemblyEnd_MPIAIJ,
1597        0,
1598        MatSetOption_MPIAIJ,
1599        MatZeroEntries_MPIAIJ,
1600        MatZeroRows_MPIAIJ,
1601 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1602        MatLUFactorSymbolic_MPIAIJ_TFS,
1603 #else
1604        0,
1605 #endif
1606        0,
1607        0,
1608        0,
1609        MatSetUpPreallocation_MPIAIJ,
1610        0,
1611        0,
1612        0,
1613        0,
1614        MatDuplicate_MPIAIJ,
1615        0,
1616        0,
1617        0,
1618        0,
1619        MatAXPY_MPIAIJ,
1620        MatGetSubMatrices_MPIAIJ,
1621        MatIncreaseOverlap_MPIAIJ,
1622        MatGetValues_MPIAIJ,
1623        MatCopy_MPIAIJ,
1624        MatPrintHelp_MPIAIJ,
1625        MatScale_MPIAIJ,
1626        0,
1627        0,
1628        0,
1629        MatGetBlockSize_MPIAIJ,
1630        0,
1631        0,
1632        0,
1633        0,
1634        MatFDColoringCreate_MPIAIJ,
1635        0,
1636        MatSetUnfactored_MPIAIJ,
1637        0,
1638        0,
1639        MatGetSubMatrix_MPIAIJ,
1640        MatDestroy_MPIAIJ,
1641        MatView_MPIAIJ,
1642        MatGetPetscMaps_Petsc,
1643        0,
1644        0,
1645        0,
1646        0,
1647        0,
1648        0,
1649        0,
1650        0,
1651        MatSetColoring_MPIAIJ,
1652        MatSetValuesAdic_MPIAIJ,
1653        MatSetValuesAdifor_MPIAIJ
1654 };
1655 
1656 /* ----------------------------------------------------------------------------------------*/
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1661 int MatStoreValues_MPIAIJ(Mat mat)
1662 {
1663   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1664   int        ierr;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1668   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 EXTERN_C_BEGIN
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1676 int MatRetrieveValues_MPIAIJ(Mat mat)
1677 {
1678   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1679   int        ierr;
1680 
1681   PetscFunctionBegin;
1682   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1683   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 EXTERN_C_END
1687 
1688 #include "petscpc.h"
1689 EXTERN_C_BEGIN
1690 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1691 EXTERN int MatDiagonalScaleLocal_MPIAIJ(Mat,Vec);
1692 EXTERN_C_END
1693 
1694 EXTERN_C_BEGIN
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1697 int MatMPIAIJSetPreallocation_MPIAIJ(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1698 {
1699   Mat_MPIAIJ   *b;
1700   int          ierr,i;
1701 
1702   PetscFunctionBegin;
1703   B->preallocated = PETSC_TRUE;
1704   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1705   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1706   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1707   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1708   if (d_nnz) {
1709     for (i=0; i<B->m; i++) {
1710       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
1711     }
1712   }
1713   if (o_nnz) {
1714     for (i=0; i<B->m; i++) {
1715       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
1716     }
1717   }
1718   b = (Mat_MPIAIJ*)B->data;
1719 
1720   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1721   PetscLogObjectParent(B,b->A);
1722   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1723   PetscLogObjectParent(B,b->B);
1724 
1725   PetscFunctionReturn(0);
1726 }
1727 EXTERN_C_END
1728 
1729 EXTERN_C_BEGIN
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCreate_MPIAIJ"
1732 int MatCreate_MPIAIJ(Mat B)
1733 {
1734   Mat_MPIAIJ *b;
1735   int        ierr,i,size;
1736 
1737   PetscFunctionBegin;
1738   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1739 
1740   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1741   B->data         = (void*)b;
1742   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1743   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1744   B->factor       = 0;
1745   B->assembled    = PETSC_FALSE;
1746   B->mapping      = 0;
1747 
1748   B->insertmode      = NOT_SET_VALUES;
1749   b->size            = size;
1750   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1751 
1752   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1753   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1754 
1755   /* the information in the maps duplicates the information computed below, eventually
1756      we should remove the duplicate information that is not contained in the maps */
1757   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1758   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1759 
1760   /* build local table of row and column ownerships */
1761   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1762   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1763   b->cowners = b->rowners + b->size + 2;
1764   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1765   b->rowners[0] = 0;
1766   for (i=2; i<=b->size; i++) {
1767     b->rowners[i] += b->rowners[i-1];
1768   }
1769   b->rstart = b->rowners[b->rank];
1770   b->rend   = b->rowners[b->rank+1];
1771   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1772   b->cowners[0] = 0;
1773   for (i=2; i<=b->size; i++) {
1774     b->cowners[i] += b->cowners[i-1];
1775   }
1776   b->cstart = b->cowners[b->rank];
1777   b->cend   = b->cowners[b->rank+1];
1778 
1779   /* build cache for off array entries formed */
1780   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1781   b->donotstash  = PETSC_FALSE;
1782   b->colmap      = 0;
1783   b->garray      = 0;
1784   b->roworiented = PETSC_TRUE;
1785 
1786   /* stuff used for matrix vector multiply */
1787   b->lvec      = PETSC_NULL;
1788   b->Mvctx     = PETSC_NULL;
1789 
1790   /* stuff for MatGetRow() */
1791   b->rowindices   = 0;
1792   b->rowvalues    = 0;
1793   b->getrowactive = PETSC_FALSE;
1794 
1795   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1796                                      "MatStoreValues_MPIAIJ",
1797                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1798   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1799                                      "MatRetrieveValues_MPIAIJ",
1800                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1801   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1802 				     "MatGetDiagonalBlock_MPIAIJ",
1803                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1805 				     "MatIsSymmetric_MPIAIJ",
1806 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
1808 				     "MatMPIAIJSetPreallocation_MPIAIJ",
1809 				     MatMPIAIJSetPreallocation_MPIAIJ); CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
1811 				     "MatDiagonalScaleLocal_MPIAIJ",
1812 				     MatDiagonalScaleLocal_MPIAIJ); CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1819 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1820 {
1821   Mat        mat;
1822   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1823   int        ierr;
1824 
1825   PetscFunctionBegin;
1826   *newmat       = 0;
1827   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1828   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1829   a    = (Mat_MPIAIJ*)mat->data;
1830   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1831   mat->factor       = matin->factor;
1832   mat->assembled    = PETSC_TRUE;
1833   mat->insertmode   = NOT_SET_VALUES;
1834   mat->preallocated = PETSC_TRUE;
1835 
1836   a->rstart       = oldmat->rstart;
1837   a->rend         = oldmat->rend;
1838   a->cstart       = oldmat->cstart;
1839   a->cend         = oldmat->cend;
1840   a->size         = oldmat->size;
1841   a->rank         = oldmat->rank;
1842   a->donotstash   = oldmat->donotstash;
1843   a->roworiented  = oldmat->roworiented;
1844   a->rowindices   = 0;
1845   a->rowvalues    = 0;
1846   a->getrowactive = PETSC_FALSE;
1847 
1848   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1849   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1850   if (oldmat->colmap) {
1851 #if defined (PETSC_USE_CTABLE)
1852     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1853 #else
1854     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1855     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1856     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1857 #endif
1858   } else a->colmap = 0;
1859   if (oldmat->garray) {
1860     int len;
1861     len  = oldmat->B->n;
1862     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1863     PetscLogObjectMemory(mat,len*sizeof(int));
1864     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1865   } else a->garray = 0;
1866 
1867   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1868   PetscLogObjectParent(mat,a->lvec);
1869   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1870   PetscLogObjectParent(mat,a->Mvctx);
1871   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1872   PetscLogObjectParent(mat,a->A);
1873   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1874   PetscLogObjectParent(mat,a->B);
1875   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1876   *newmat = mat;
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #include "petscsys.h"
1881 
1882 EXTERN_C_BEGIN
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatLoad_MPIAIJ"
1885 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1886 {
1887   Mat          A;
1888   PetscScalar  *vals,*svals;
1889   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1890   MPI_Status   status;
1891   int          i,nz,ierr,j,rstart,rend,fd;
1892   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1893   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1894   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1895 
1896   PetscFunctionBegin;
1897   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1898   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1899   if (!rank) {
1900     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1901     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1902     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1903     if (header[3] < 0) {
1904       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1905     }
1906   }
1907 
1908   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1909   M = header[1]; N = header[2];
1910   /* determine ownership of all rows */
1911   m = M/size + ((M % size) > rank);
1912   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1913   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1914   rowners[0] = 0;
1915   for (i=2; i<=size; i++) {
1916     rowners[i] += rowners[i-1];
1917   }
1918   rstart = rowners[rank];
1919   rend   = rowners[rank+1];
1920 
1921   /* distribute row lengths to all processors */
1922   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1923   offlens = ourlens + (rend-rstart);
1924   if (!rank) {
1925     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1926     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1927     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1928     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1929     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1930     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1931   } else {
1932     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1933   }
1934 
1935   if (!rank) {
1936     /* calculate the number of nonzeros on each processor */
1937     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1938     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1939     for (i=0; i<size; i++) {
1940       for (j=rowners[i]; j< rowners[i+1]; j++) {
1941         procsnz[i] += rowlengths[j];
1942       }
1943     }
1944     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1945 
1946     /* determine max buffer needed and allocate it */
1947     maxnz = 0;
1948     for (i=0; i<size; i++) {
1949       maxnz = PetscMax(maxnz,procsnz[i]);
1950     }
1951     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1952 
1953     /* read in my part of the matrix column indices  */
1954     nz   = procsnz[0];
1955     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1956     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1957 
1958     /* read in every one elses and ship off */
1959     for (i=1; i<size; i++) {
1960       nz   = procsnz[i];
1961       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1962       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1963     }
1964     ierr = PetscFree(cols);CHKERRQ(ierr);
1965   } else {
1966     /* determine buffer space needed for message */
1967     nz = 0;
1968     for (i=0; i<m; i++) {
1969       nz += ourlens[i];
1970     }
1971     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1972 
1973     /* receive message of column indices*/
1974     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1975     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1976     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1977   }
1978 
1979   /* determine column ownership if matrix is not square */
1980   if (N != M) {
1981     n      = N/size + ((N % size) > rank);
1982     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1983     cstart = cend - n;
1984   } else {
1985     cstart = rstart;
1986     cend   = rend;
1987     n      = cend - cstart;
1988   }
1989 
1990   /* loop over local rows, determining number of off diagonal entries */
1991   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1992   jj = 0;
1993   for (i=0; i<m; i++) {
1994     for (j=0; j<ourlens[i]; j++) {
1995       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1996       jj++;
1997     }
1998   }
1999 
2000   /* create our matrix */
2001   for (i=0; i<m; i++) {
2002     ourlens[i] -= offlens[i];
2003   }
2004   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
2005   ierr = MatSetType(A,type);CHKERRQ(ierr);
2006   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
2007 
2008   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2009   for (i=0; i<m; i++) {
2010     ourlens[i] += offlens[i];
2011   }
2012 
2013   if (!rank) {
2014     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2015 
2016     /* read in my part of the matrix numerical values  */
2017     nz   = procsnz[0];
2018     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2019 
2020     /* insert into matrix */
2021     jj      = rstart;
2022     smycols = mycols;
2023     svals   = vals;
2024     for (i=0; i<m; i++) {
2025       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2026       smycols += ourlens[i];
2027       svals   += ourlens[i];
2028       jj++;
2029     }
2030 
2031     /* read in other processors and ship out */
2032     for (i=1; i<size; i++) {
2033       nz   = procsnz[i];
2034       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2035       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2036     }
2037     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2038   } else {
2039     /* receive numeric values */
2040     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2041 
2042     /* receive message of values*/
2043     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2044     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2045     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2046 
2047     /* insert into matrix */
2048     jj      = rstart;
2049     smycols = mycols;
2050     svals   = vals;
2051     for (i=0; i<m; i++) {
2052       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2053       smycols += ourlens[i];
2054       svals   += ourlens[i];
2055       jj++;
2056     }
2057   }
2058   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2059   ierr = PetscFree(vals);CHKERRQ(ierr);
2060   ierr = PetscFree(mycols);CHKERRQ(ierr);
2061   ierr = PetscFree(rowners);CHKERRQ(ierr);
2062 
2063   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   *newmat = A;
2066   PetscFunctionReturn(0);
2067 }
2068 EXTERN_C_END
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2072 /*
2073     Not great since it makes two copies of the submatrix, first an SeqAIJ
2074   in local and then by concatenating the local matrices the end result.
2075   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2076 */
2077 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2078 {
2079   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2080   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2081   Mat          *local,M,Mreuse;
2082   PetscScalar  *vwork,*aa;
2083   MPI_Comm     comm = mat->comm;
2084   Mat_SeqAIJ   *aij;
2085 
2086 
2087   PetscFunctionBegin;
2088   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2089   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2090 
2091   if (call ==  MAT_REUSE_MATRIX) {
2092     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2093     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2094     local = &Mreuse;
2095     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2096   } else {
2097     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2098     Mreuse = *local;
2099     ierr   = PetscFree(local);CHKERRQ(ierr);
2100   }
2101 
2102   /*
2103       m - number of local rows
2104       n - number of columns (same on all processors)
2105       rstart - first row in new global matrix generated
2106   */
2107   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2108   if (call == MAT_INITIAL_MATRIX) {
2109     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2110     ii  = aij->i;
2111     jj  = aij->j;
2112 
2113     /*
2114         Determine the number of non-zeros in the diagonal and off-diagonal
2115         portions of the matrix in order to do correct preallocation
2116     */
2117 
2118     /* first get start and end of "diagonal" columns */
2119     if (csize == PETSC_DECIDE) {
2120       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2121       if (mglobal == n) { /* square matrix */
2122 	nlocal = m;
2123       } else {
2124         nlocal = n/size + ((n % size) > rank);
2125       }
2126     } else {
2127       nlocal = csize;
2128     }
2129     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2130     rstart = rend - nlocal;
2131     if (rank == size - 1 && rend != n) {
2132       SETERRQ2(1,"Local column sizes %d do not add up to total number of columns %d",rend,n);
2133     }
2134 
2135     /* next, compute all the lengths */
2136     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2137     olens = dlens + m;
2138     for (i=0; i<m; i++) {
2139       jend = ii[i+1] - ii[i];
2140       olen = 0;
2141       dlen = 0;
2142       for (j=0; j<jend; j++) {
2143         if (*jj < rstart || *jj >= rend) olen++;
2144         else dlen++;
2145         jj++;
2146       }
2147       olens[i] = olen;
2148       dlens[i] = dlen;
2149     }
2150     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2151     ierr = PetscFree(dlens);CHKERRQ(ierr);
2152   } else {
2153     int ml,nl;
2154 
2155     M = *newmat;
2156     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2157     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2158     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2159     /*
2160          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2161        rather than the slower MatSetValues().
2162     */
2163     M->was_assembled = PETSC_TRUE;
2164     M->assembled     = PETSC_FALSE;
2165   }
2166   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2167   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2168   ii  = aij->i;
2169   jj  = aij->j;
2170   aa  = aij->a;
2171   for (i=0; i<m; i++) {
2172     row   = rstart + i;
2173     nz    = ii[i+1] - ii[i];
2174     cwork = jj;     jj += nz;
2175     vwork = aa;     aa += nz;
2176     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2177   }
2178 
2179   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   *newmat = M;
2182 
2183   /* save submatrix used in processor for next request */
2184   if (call ==  MAT_INITIAL_MATRIX) {
2185     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2186     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2187   }
2188 
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2194 /*@C
2195    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2196    (the default parallel PETSc format).  For good matrix assembly performance
2197    the user should preallocate the matrix storage by setting the parameters
2198    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199    performance can be increased by more than a factor of 50.
2200 
2201    Collective on MPI_Comm
2202 
2203    Input Parameters:
2204 +  A - the matrix
2205 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2206            (same value is used for all local rows)
2207 .  d_nnz - array containing the number of nonzeros in the various rows of the
2208            DIAGONAL portion of the local submatrix (possibly different for each row)
2209            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2210            The size of this array is equal to the number of local rows, i.e 'm'.
2211            You must leave room for the diagonal entry even if it is zero.
2212 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2213            submatrix (same value is used for all local rows).
2214 -  o_nnz - array containing the number of nonzeros in the various rows of the
2215            OFF-DIAGONAL portion of the local submatrix (possibly different for
2216            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2217            structure. The size of this array is equal to the number
2218            of local rows, i.e 'm'.
2219 
2220    The AIJ format (also called the Yale sparse matrix format or
2221    compressed row storage), is fully compatible with standard Fortran 77
2222    storage.  That is, the stored row and column indices can begin at
2223    either one (as in Fortran) or zero.  See the users manual for details.
2224 
2225    The user MUST specify either the local or global matrix dimensions
2226    (possibly both).
2227 
2228    The parallel matrix is partitioned such that the first m0 rows belong to
2229    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2230    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2231 
2232    The DIAGONAL portion of the local submatrix of a processor can be defined
2233    as the submatrix which is obtained by extraction the part corresponding
2234    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2235    first row that belongs to the processor, and r2 is the last row belonging
2236    to the this processor. This is a square mxm matrix. The remaining portion
2237    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2238 
2239    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2240 
2241    By default, this format uses inodes (identical nodes) when possible.
2242    We search for consecutive rows with the same nonzero structure, thereby
2243    reusing matrix information to achieve increased efficiency.
2244 
2245    Options Database Keys:
2246 +  -mat_aij_no_inode  - Do not use inodes
2247 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2248 -  -mat_aij_oneindex - Internally use indexing starting at 1
2249         rather than 0.  Note that when calling MatSetValues(),
2250         the user still MUST index entries starting at 0!
2251 
2252    Example usage:
2253 
2254    Consider the following 8x8 matrix with 34 non-zero values, that is
2255    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2256    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2257    as follows:
2258 
2259 .vb
2260             1  2  0  |  0  3  0  |  0  4
2261     Proc0   0  5  6  |  7  0  0  |  8  0
2262             9  0 10  | 11  0  0  | 12  0
2263     -------------------------------------
2264            13  0 14  | 15 16 17  |  0  0
2265     Proc1   0 18  0  | 19 20 21  |  0  0
2266             0  0  0  | 22 23  0  | 24  0
2267     -------------------------------------
2268     Proc2  25 26 27  |  0  0 28  | 29  0
2269            30  0  0  | 31 32 33  |  0 34
2270 .ve
2271 
2272    This can be represented as a collection of submatrices as:
2273 
2274 .vb
2275       A B C
2276       D E F
2277       G H I
2278 .ve
2279 
2280    Where the submatrices A,B,C are owned by proc0, D,E,F are
2281    owned by proc1, G,H,I are owned by proc2.
2282 
2283    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2284    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2285    The 'M','N' parameters are 8,8, and have the same values on all procs.
2286 
2287    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2288    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2289    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2290    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2291    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2292    matrix, ans [DF] as another SeqAIJ matrix.
2293 
2294    When d_nz, o_nz parameters are specified, d_nz storage elements are
2295    allocated for every row of the local diagonal submatrix, and o_nz
2296    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2297    One way to choose d_nz and o_nz is to use the max nonzerors per local
2298    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2299    In this case, the values of d_nz,o_nz are:
2300 .vb
2301      proc0 : dnz = 2, o_nz = 2
2302      proc1 : dnz = 3, o_nz = 2
2303      proc2 : dnz = 1, o_nz = 4
2304 .ve
2305    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2306    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2307    for proc3. i.e we are using 12+15+10=37 storage locations to store
2308    34 values.
2309 
2310    When d_nnz, o_nnz parameters are specified, the storage is specified
2311    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2312    In the above case the values for d_nnz,o_nnz are:
2313 .vb
2314      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2315      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2316      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2317 .ve
2318    Here the space allocated is sum of all the above values i.e 34, and
2319    hence pre-allocation is perfect.
2320 
2321    Level: intermediate
2322 
2323 .keywords: matrix, aij, compressed row, sparse, parallel
2324 
2325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2326 @*/
2327 int MatMPIAIJSetPreallocation(Mat B,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2328 {
2329   int ierr,(*f)(Mat,int,const int[],int,const int[]);
2330 
2331   PetscFunctionBegin;
2332   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2333   if (f) {
2334     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2335   }
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatCreateMPIAIJ"
2341 /*@C
2342    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2343    (the default parallel PETSc format).  For good matrix assembly performance
2344    the user should preallocate the matrix storage by setting the parameters
2345    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2346    performance can be increased by more than a factor of 50.
2347 
2348    Collective on MPI_Comm
2349 
2350    Input Parameters:
2351 +  comm - MPI communicator
2352 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2353            This value should be the same as the local size used in creating the
2354            y vector for the matrix-vector product y = Ax.
2355 .  n - This value should be the same as the local size used in creating the
2356        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2357        calculated if N is given) For square matrices n is almost always m.
2358 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2359 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2360 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2361            (same value is used for all local rows)
2362 .  d_nnz - array containing the number of nonzeros in the various rows of the
2363            DIAGONAL portion of the local submatrix (possibly different for each row)
2364            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2365            The size of this array is equal to the number of local rows, i.e 'm'.
2366            You must leave room for the diagonal entry even if it is zero.
2367 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2368            submatrix (same value is used for all local rows).
2369 -  o_nnz - array containing the number of nonzeros in the various rows of the
2370            OFF-DIAGONAL portion of the local submatrix (possibly different for
2371            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2372            structure. The size of this array is equal to the number
2373            of local rows, i.e 'm'.
2374 
2375    Output Parameter:
2376 .  A - the matrix
2377 
2378    Notes:
2379    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2380    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2381    storage requirements for this matrix.
2382 
2383    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2384    processor than it must be used on all processors that share the object for
2385    that argument.
2386 
2387    The AIJ format (also called the Yale sparse matrix format or
2388    compressed row storage), is fully compatible with standard Fortran 77
2389    storage.  That is, the stored row and column indices can begin at
2390    either one (as in Fortran) or zero.  See the users manual for details.
2391 
2392    The user MUST specify either the local or global matrix dimensions
2393    (possibly both).
2394 
2395    The parallel matrix is partitioned such that the first m0 rows belong to
2396    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2397    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2398 
2399    The DIAGONAL portion of the local submatrix of a processor can be defined
2400    as the submatrix which is obtained by extraction the part corresponding
2401    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2402    first row that belongs to the processor, and r2 is the last row belonging
2403    to the this processor. This is a square mxm matrix. The remaining portion
2404    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2405 
2406    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2407 
2408    When calling this routine with a single process communicator, a matrix of
2409    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2410    type of communicator, use the construction mechanism:
2411      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2412 
2413    By default, this format uses inodes (identical nodes) when possible.
2414    We search for consecutive rows with the same nonzero structure, thereby
2415    reusing matrix information to achieve increased efficiency.
2416 
2417    Options Database Keys:
2418 +  -mat_aij_no_inode  - Do not use inodes
2419 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2420 -  -mat_aij_oneindex - Internally use indexing starting at 1
2421         rather than 0.  Note that when calling MatSetValues(),
2422         the user still MUST index entries starting at 0!
2423 
2424 
2425    Example usage:
2426 
2427    Consider the following 8x8 matrix with 34 non-zero values, that is
2428    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2429    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2430    as follows:
2431 
2432 .vb
2433             1  2  0  |  0  3  0  |  0  4
2434     Proc0   0  5  6  |  7  0  0  |  8  0
2435             9  0 10  | 11  0  0  | 12  0
2436     -------------------------------------
2437            13  0 14  | 15 16 17  |  0  0
2438     Proc1   0 18  0  | 19 20 21  |  0  0
2439             0  0  0  | 22 23  0  | 24  0
2440     -------------------------------------
2441     Proc2  25 26 27  |  0  0 28  | 29  0
2442            30  0  0  | 31 32 33  |  0 34
2443 .ve
2444 
2445    This can be represented as a collection of submatrices as:
2446 
2447 .vb
2448       A B C
2449       D E F
2450       G H I
2451 .ve
2452 
2453    Where the submatrices A,B,C are owned by proc0, D,E,F are
2454    owned by proc1, G,H,I are owned by proc2.
2455 
2456    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2457    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2458    The 'M','N' parameters are 8,8, and have the same values on all procs.
2459 
2460    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2461    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2462    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2463    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2464    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2465    matrix, ans [DF] as another SeqAIJ matrix.
2466 
2467    When d_nz, o_nz parameters are specified, d_nz storage elements are
2468    allocated for every row of the local diagonal submatrix, and o_nz
2469    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2470    One way to choose d_nz and o_nz is to use the max nonzerors per local
2471    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2472    In this case, the values of d_nz,o_nz are:
2473 .vb
2474      proc0 : dnz = 2, o_nz = 2
2475      proc1 : dnz = 3, o_nz = 2
2476      proc2 : dnz = 1, o_nz = 4
2477 .ve
2478    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2479    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2480    for proc3. i.e we are using 12+15+10=37 storage locations to store
2481    34 values.
2482 
2483    When d_nnz, o_nnz parameters are specified, the storage is specified
2484    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2485    In the above case the values for d_nnz,o_nnz are:
2486 .vb
2487      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2488      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2489      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2490 .ve
2491    Here the space allocated is sum of all the above values i.e 34, and
2492    hence pre-allocation is perfect.
2493 
2494    Level: intermediate
2495 
2496 .keywords: matrix, aij, compressed row, sparse, parallel
2497 
2498 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2499 @*/
2500 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2501 {
2502   int ierr,size;
2503 
2504   PetscFunctionBegin;
2505   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2506   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2507   if (size > 1) {
2508     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2509     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2510   } else {
2511     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2512     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2513   }
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2519 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2520 {
2521   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2522   PetscFunctionBegin;
2523   *Ad     = a->A;
2524   *Ao     = a->B;
2525   *colmap = a->garray;
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 #undef __FUNCT__
2530 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2531 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2532 {
2533   int        ierr,i;
2534   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2535 
2536   PetscFunctionBegin;
2537   if (coloring->ctype == IS_COLORING_LOCAL) {
2538     ISColoringValue *allcolors,*colors;
2539     ISColoring      ocoloring;
2540 
2541     /* set coloring for diagonal portion */
2542     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2543 
2544     /* set coloring for off-diagonal portion */
2545     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2546     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2547     for (i=0; i<a->B->n; i++) {
2548       colors[i] = allcolors[a->garray[i]];
2549     }
2550     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2551     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2552     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2553     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2554   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2555     ISColoringValue *colors;
2556     int             *larray;
2557     ISColoring      ocoloring;
2558 
2559     /* set coloring for diagonal portion */
2560     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2561     for (i=0; i<a->A->n; i++) {
2562       larray[i] = i + a->cstart;
2563     }
2564     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2565     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2566     for (i=0; i<a->A->n; i++) {
2567       colors[i] = coloring->colors[larray[i]];
2568     }
2569     ierr = PetscFree(larray);CHKERRQ(ierr);
2570     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2571     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2572     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2573 
2574     /* set coloring for off-diagonal portion */
2575     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2576     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2577     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2578     for (i=0; i<a->B->n; i++) {
2579       colors[i] = coloring->colors[larray[i]];
2580     }
2581     ierr = PetscFree(larray);CHKERRQ(ierr);
2582     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2583     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2584     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2585   } else {
2586     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2587   }
2588 
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 #undef __FUNCT__
2593 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2594 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2595 {
2596   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2597   int        ierr;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2601   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2607 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2608 {
2609   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2610   int        ierr;
2611 
2612   PetscFunctionBegin;
2613   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2614   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 #undef __FUNCT__
2619 #define __FUNCT__ "MatMerge"
2620 /*@C
2621       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2622                  matrices from each processor
2623 
2624     Collective on MPI_Comm
2625 
2626    Input Parameters:
2627 +    comm - the communicators the parallel matrix will live on
2628 -    inmat - the input sequential matrices
2629 
2630    Output Parameter:
2631 .    outmat - the parallel matrix generated
2632 
2633     Level: advanced
2634 
2635    Notes: The number of columns of the matrix in EACH of the seperate files
2636       MUST be the same.
2637 
2638 @*/
2639 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2640 {
2641   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2642   PetscScalar *values;
2643   PetscMap    columnmap,rowmap;
2644 
2645   PetscFunctionBegin;
2646 
2647   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2648 
2649   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2650   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2651   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2652   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2653   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2654   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2655 
2656   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2657   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2658   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2659   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2660   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2661 
2662   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2663   for (i=0;i<m;i++) {
2664     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2665     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2666     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2667   }
2668   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2669   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2670 
2671   for (i=0;i<m;i++) {
2672     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2673     I    = i + rstart;
2674     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2675     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2676   }
2677   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2678   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2679   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680 
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatFileSplit"
2686 int MatFileSplit(Mat A,char *outfile)
2687 {
2688   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2689   PetscViewer out;
2690   char        *name;
2691   Mat         B;
2692   PetscScalar *values;
2693 
2694   PetscFunctionBegin;
2695 
2696   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2697   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2698   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2699   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2700   for (i=0;i<m;i++) {
2701     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2702     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2703     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2704   }
2705   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2706   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2707 
2708   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2709   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2710   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2711   sprintf(name,"%s.%d",outfile,rank);
2712   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2713   ierr = PetscFree(name);
2714   ierr = MatView(B,out);CHKERRQ(ierr);
2715   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2716   ierr = MatDestroy(B);CHKERRQ(ierr);
2717   PetscFunctionReturn(0);
2718 }
2719