xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cd0d46ebc1dba9f5ba3de7cf2d3eee7f6fd25de7)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
353   Mat_SeqAIJ  *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
354   int         i,j,rstart,ncols,n,ierr,flg;
355   int         *row,*col,other_disassembled;
356   PetscScalar *val;
357   InsertMode  addv = mat->insertmode;
358 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
359   PetscTruth  flag;
360 #endif
361 
362   PetscFunctionBegin;
363   if (!aij->donotstash) {
364     while (1) {
365       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
366       if (!flg) break;
367 
368       for (i=0; i<n;) {
369         /* Now identify the consecutive vals belonging to the same row */
370         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
371         if (j < n) ncols = j-i;
372         else       ncols = n-i;
373         /* Now assemble all these values with a single function call */
374         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
375         i = j;
376       }
377     }
378     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
379   }
380 
381   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
382   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
383 
384   /* determine if any processor has disassembled, if so we must
385      also disassemble ourselfs, in order that we may reassemble. */
386   /*
387      if nonzero structure of submatrix B cannot change then we know that
388      no processor disassembled thus we can skip this stuff
389   */
390   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
391     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
392     if (mat->was_assembled && !other_disassembled) {
393       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
394     }
395   }
396 
397   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
398     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
399   }
400   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
401   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
402 
403   if (aij->rowvalues) {
404     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
405     aij->rowvalues = 0;
406   }
407 
408   /* used by MatAXPY() */
409   a->xtoy = 0; b->xtoy = 0;
410   a->XtoY = 0; b->XtoY = 0;
411 
412 #if defined(PETSC_HAVE_SUPERLUDIST)
413   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
414   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
415 #endif
416 
417 #if defined(PETSC_HAVE_SPOOLES)
418   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
419   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
420 #endif
421   PetscFunctionReturn(0);
422 }
423 
424 #undef __FUNCT__
425 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
426 int MatZeroEntries_MPIAIJ(Mat A)
427 {
428   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
429   int        ierr;
430 
431   PetscFunctionBegin;
432   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
433   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
434   PetscFunctionReturn(0);
435 }
436 
437 #undef __FUNCT__
438 #define __FUNCT__ "MatZeroRows_MPIAIJ"
439 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
440 {
441   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
442   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
443   int            *nprocs,j,idx,nsends,row;
444   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
445   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
446   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
447   MPI_Comm       comm = A->comm;
448   MPI_Request    *send_waits,*recv_waits;
449   MPI_Status     recv_status,*send_status;
450   IS             istmp;
451   PetscTruth     found;
452 
453   PetscFunctionBegin;
454   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
455   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
456 
457   /*  first count number of contributors to each processor */
458   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
459   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
460   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
461   for (i=0; i<N; i++) {
462     idx = rows[i];
463     found = PETSC_FALSE;
464     for (j=0; j<size; j++) {
465       if (idx >= owners[j] && idx < owners[j+1]) {
466         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
467       }
468     }
469     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
470   }
471   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
472 
473   /* inform other processors of number of messages and max length*/
474   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
475 
476   /* post receives:   */
477   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
478   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
479   for (i=0; i<nrecvs; i++) {
480     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
481   }
482 
483   /* do sends:
484       1) starts[i] gives the starting index in svalues for stuff going to
485          the ith processor
486   */
487   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
488   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
489   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
490   starts[0] = 0;
491   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
492   for (i=0; i<N; i++) {
493     svalues[starts[owner[i]]++] = rows[i];
494   }
495   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
496 
497   starts[0] = 0;
498   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
499   count = 0;
500   for (i=0; i<size; i++) {
501     if (nprocs[2*i+1]) {
502       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
503     }
504   }
505   ierr = PetscFree(starts);CHKERRQ(ierr);
506 
507   base = owners[rank];
508 
509   /*  wait on receives */
510   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
511   source = lens + nrecvs;
512   count  = nrecvs; slen = 0;
513   while (count) {
514     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
515     /* unpack receives into our local space */
516     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
517     source[imdex]  = recv_status.MPI_SOURCE;
518     lens[imdex]    = n;
519     slen          += n;
520     count--;
521   }
522   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
523 
524   /* move the data into the send scatter */
525   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
526   count = 0;
527   for (i=0; i<nrecvs; i++) {
528     values = rvalues + i*nmax;
529     for (j=0; j<lens[i]; j++) {
530       lrows[count++] = values[j] - base;
531     }
532   }
533   ierr = PetscFree(rvalues);CHKERRQ(ierr);
534   ierr = PetscFree(lens);CHKERRQ(ierr);
535   ierr = PetscFree(owner);CHKERRQ(ierr);
536   ierr = PetscFree(nprocs);CHKERRQ(ierr);
537 
538   /* actually zap the local rows */
539   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
540   PetscLogObjectParent(A,istmp);
541 
542   /*
543         Zero the required rows. If the "diagonal block" of the matrix
544      is square and the user wishes to set the diagonal we use seperate
545      code so that MatSetValues() is not called for each diagonal allocating
546      new memory, thus calling lots of mallocs and slowing things down.
547 
548        Contributed by: Mathew Knepley
549   */
550   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
551   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
552   if (diag && (l->A->M == l->A->N)) {
553     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
554   } else if (diag) {
555     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
556     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
557       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
558 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
559     }
560     for (i = 0; i < slen; i++) {
561       row  = lrows[i] + rstart;
562       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
563     }
564     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
565     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
566   } else {
567     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
568   }
569   ierr = ISDestroy(istmp);CHKERRQ(ierr);
570   ierr = PetscFree(lrows);CHKERRQ(ierr);
571 
572   /* wait on sends */
573   if (nsends) {
574     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
575     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
576     ierr = PetscFree(send_status);CHKERRQ(ierr);
577   }
578   ierr = PetscFree(send_waits);CHKERRQ(ierr);
579   ierr = PetscFree(svalues);CHKERRQ(ierr);
580 
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatMult_MPIAIJ"
586 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
587 {
588   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
589   int        ierr,nt;
590 
591   PetscFunctionBegin;
592   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
593   if (nt != A->n) {
594     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
595   }
596   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
597   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
598   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
599   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
600   PetscFunctionReturn(0);
601 }
602 
603 #undef __FUNCT__
604 #define __FUNCT__ "MatMultAdd_MPIAIJ"
605 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
606 {
607   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
608   int        ierr;
609 
610   PetscFunctionBegin;
611   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
612   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
613   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
614   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
620 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
621 {
622   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
623   int        ierr;
624 
625   PetscFunctionBegin;
626   /* do nondiagonal part */
627   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
628   /* send it on its way */
629   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   /* do local part */
631   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
632   /* receive remote parts: note this assumes the values are not actually */
633   /* inserted in yy until the next line, which is true for my implementation*/
634   /* but is not perhaps always true. */
635   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 EXTERN_C_BEGIN
640 #undef __FUNCT__
641 #define __FUNCT__ "MatIsSymmetric_MPIAIJ"
642 int MatIsSymmetric_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth *f)
643 {
644   Mat_MPIAIJ *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
645   Mat        A = Aij->A, B,Aoff = Aij->B,Boff;
646   MatType    type;
647   IS         Me,Notme;
648   int        M,N,first,last,*notme,i, ierr;
649 
650   PetscFunctionBegin;
651   ierr = MatGetType(Bmat,&type); CHKERRQ(ierr);
652   ierr = PetscStrcmp(type,MATMPIAIJ,f); CHKERRQ(ierr);
653   if (!*f) SETERRQ(1,"Second matrix needs to be SeqAIJ too");
654   ierr = MatIsSymmetric(A,B,f); CHKERRQ(ierr);
655   if (!*f) PetscFunctionReturn(0);
656   Bij = (Mat_MPIAIJ *) Bmat->data; B = Bij->B;
657   ierr = MatGetSize(Amat,&M,&N); CHKERRQ(ierr);
658   ierr = MatGetOwnershipRange(Amat,&first,&last); CHKERRQ(ierr);
659   ierr = PetscMalloc((N-last+first)*sizeof(int),&notme); CHKERRQ(ierr);
660   for (i=0; i<first; i++) notme[i] = i;
661   for (i=last; i<M; i++) notme[i-last+first] = i;
662   ierr = ISCreateGeneral
663     (MPI_COMM_SELF,N-last+first,notme,&Notme); CHKERRQ(ierr);
664   ierr = ISCreateStride
665     (MPI_COMM_SELF,last-first,first,1,&Me); CHKERRQ(ierr);
666   ierr = MatGetSubMatrix
667     (B,Notme,Me,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Boff); CHKERRQ(ierr);
668   ierr = MatIsSymmetric(Aoff,Boff,f); CHKERRQ(ierr);
669   ierr = MatDestroy(Boff); CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 EXTERN_C_END
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
676 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   /* do nondiagonal part */
683   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
684   /* send it on its way */
685   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
686   /* do local part */
687   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
688   /* receive remote parts: note this assumes the values are not actually */
689   /* inserted in yy until the next line, which is true for my implementation*/
690   /* but is not perhaps always true. */
691   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
692   PetscFunctionReturn(0);
693 }
694 
695 /*
696   This only works correctly for square matrices where the subblock A->A is the
697    diagonal block
698 */
699 #undef __FUNCT__
700 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
701 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
702 {
703   int        ierr;
704   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
705 
706   PetscFunctionBegin;
707   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
708   if (a->rstart != a->cstart || a->rend != a->cend) {
709     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
710   }
711   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 #undef __FUNCT__
716 #define __FUNCT__ "MatScale_MPIAIJ"
717 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
718 {
719   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
720   int        ierr;
721 
722   PetscFunctionBegin;
723   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
724   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatDestroy_MPIAIJ"
730 int MatDestroy_MPIAIJ(Mat mat)
731 {
732   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
733   int        ierr;
734 
735   PetscFunctionBegin;
736 #if defined(PETSC_USE_LOG)
737   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
738 #endif
739   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
740   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
741   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
742   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
743 #if defined (PETSC_USE_CTABLE)
744   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
745 #else
746   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
747 #endif
748   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
749   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
750   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
751   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
752   ierr = PetscFree(aij);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
757 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
758 
759 #undef __FUNCT__
760 #define __FUNCT__ "MatView_MPIAIJ_Binary"
761 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
762 {
763   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
764   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
765   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
766   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
767   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
768   PetscScalar       *column_values;
769 
770   PetscFunctionBegin;
771   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
772   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
773   nz   = A->nz + B->nz;
774   if (rank == 0) {
775     header[0] = MAT_FILE_COOKIE;
776     header[1] = mat->M;
777     header[2] = mat->N;
778     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
779     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
780     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
781     /* get largest number of rows any processor has */
782     rlen = mat->m;
783     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
784     for (i=1; i<size; i++) {
785       rlen = PetscMax(rlen,range[i+1] - range[i]);
786     }
787   } else {
788     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
789     rlen = mat->m;
790   }
791 
792   /* load up the local row counts */
793   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
794   for (i=0; i<mat->m; i++) {
795     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
796   }
797 
798   /* store the row lengths to the file */
799   if (rank == 0) {
800     MPI_Status status;
801     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
802     for (i=1; i<size; i++) {
803       rlen = range[i+1] - range[i];
804       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
805       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
806     }
807   } else {
808     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
809   }
810   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
811 
812   /* load up the local column indices */
813   nzmax = nz; /* )th processor needs space a largest processor needs */
814   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
815   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
816   cnt  = 0;
817   for (i=0; i<mat->m; i++) {
818     for (j=B->i[i]; j<B->i[i+1]; j++) {
819       if ( (col = garray[B->j[j]]) > cstart) break;
820       column_indices[cnt++] = col;
821     }
822     for (k=A->i[i]; k<A->i[i+1]; k++) {
823       column_indices[cnt++] = A->j[k] + cstart;
824     }
825     for (; j<B->i[i+1]; j++) {
826       column_indices[cnt++] = garray[B->j[j]];
827     }
828   }
829   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
830 
831   /* store the column indices to the file */
832   if (rank == 0) {
833     MPI_Status status;
834     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
835     for (i=1; i<size; i++) {
836       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
837       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
838       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
839       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
840     }
841   } else {
842     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
843     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
844   }
845   ierr = PetscFree(column_indices);CHKERRQ(ierr);
846 
847   /* load up the local column values */
848   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
849   cnt  = 0;
850   for (i=0; i<mat->m; i++) {
851     for (j=B->i[i]; j<B->i[i+1]; j++) {
852       if ( garray[B->j[j]] > cstart) break;
853       column_values[cnt++] = B->a[j];
854     }
855     for (k=A->i[i]; k<A->i[i+1]; k++) {
856       column_values[cnt++] = A->a[k];
857     }
858     for (; j<B->i[i+1]; j++) {
859       column_values[cnt++] = B->a[j];
860     }
861   }
862   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
863 
864   /* store the column values to the file */
865   if (rank == 0) {
866     MPI_Status status;
867     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
868     for (i=1; i<size; i++) {
869       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
870       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
871       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
872       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
873     }
874   } else {
875     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
876     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
877   }
878   ierr = PetscFree(column_values);CHKERRQ(ierr);
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
884 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
885 {
886   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
887   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
888   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
889   PetscTruth        isdraw,isascii,flg,isbinary;
890   PetscViewer       sviewer;
891   PetscViewerFormat format;
892 
893   PetscFunctionBegin;
894   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
895   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
896   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
897   if (isascii) {
898     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
899     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
900       MatInfo info;
901       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
902       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
903       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
904       if (flg) {
905         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
906 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
907       } else {
908         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
909 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
910       }
911       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
912       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
913       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
914       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
915       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
916       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
917       PetscFunctionReturn(0);
918     } else if (format == PETSC_VIEWER_ASCII_INFO) {
919       PetscFunctionReturn(0);
920     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
921 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
922       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
923 #endif
924 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE)
925       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
926 #endif
927 
928       PetscFunctionReturn(0);
929     }
930   } else if (isbinary) {
931     if (size == 1) {
932       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
933       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
934     } else {
935       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
936     }
937     PetscFunctionReturn(0);
938   } else if (isdraw) {
939     PetscDraw  draw;
940     PetscTruth isnull;
941     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
942     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
943   }
944 
945   if (size == 1) {
946     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
947     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
948   } else {
949     /* assemble the entire matrix onto first processor. */
950     Mat         A;
951     Mat_SeqAIJ *Aloc;
952     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
953     PetscScalar *a;
954 
955     if (!rank) {
956       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
957     } else {
958       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
959     }
960     PetscLogObjectParent(mat,A);
961 
962     /* copy over the A part */
963     Aloc = (Mat_SeqAIJ*)aij->A->data;
964     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
965     row = aij->rstart;
966     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
967     for (i=0; i<m; i++) {
968       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
969       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
970     }
971     aj = Aloc->j;
972     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
973 
974     /* copy over the B part */
975     Aloc = (Mat_SeqAIJ*)aij->B->data;
976     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
977     row  = aij->rstart;
978     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
979     ct   = cols;
980     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
981     for (i=0; i<m; i++) {
982       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
983       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
984     }
985     ierr = PetscFree(ct);CHKERRQ(ierr);
986     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
987     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
988     /*
989        Everyone has to call to draw the matrix since the graphics waits are
990        synchronized across all processors that share the PetscDraw object
991     */
992     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
993     if (!rank) {
994       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
995       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
996     }
997     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
998     ierr = MatDestroy(A);CHKERRQ(ierr);
999   }
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 #undef __FUNCT__
1004 #define __FUNCT__ "MatView_MPIAIJ"
1005 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1006 {
1007   int        ierr;
1008   PetscTruth isascii,isdraw,issocket,isbinary;
1009 
1010   PetscFunctionBegin;
1011   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1012   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1013   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1014   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1015   if (isascii || isdraw || isbinary || issocket) {
1016     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1017   } else {
1018     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatRelax_MPIAIJ"
1027 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
1028 {
1029   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1030   int          ierr;
1031   Vec          bb1;
1032   PetscScalar  mone=-1.0;
1033 
1034   PetscFunctionBegin;
1035   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
1036 
1037   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1038 
1039   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1040     if (flag & SOR_ZERO_INITIAL_GUESS) {
1041       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1042       its--;
1043     }
1044 
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1058     if (flag & SOR_ZERO_INITIAL_GUESS) {
1059       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1060       its--;
1061     }
1062     while (its--) {
1063       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1064       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1065 
1066       /* update rhs: bb1 = bb - B*x */
1067       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1068       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1069 
1070       /* local sweep */
1071       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1072       CHKERRQ(ierr);
1073     }
1074   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1075     if (flag & SOR_ZERO_INITIAL_GUESS) {
1076       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1077       its--;
1078     }
1079     while (its--) {
1080       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1081       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1082 
1083       /* update rhs: bb1 = bb - B*x */
1084       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1085       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1086 
1087       /* local sweep */
1088       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1089       CHKERRQ(ierr);
1090     }
1091   } else {
1092     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1093   }
1094 
1095   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1101 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1102 {
1103   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1104   Mat        A = mat->A,B = mat->B;
1105   int        ierr;
1106   PetscReal  isend[5],irecv[5];
1107 
1108   PetscFunctionBegin;
1109   info->block_size     = 1.0;
1110   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1111   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1112   isend[3] = info->memory;  isend[4] = info->mallocs;
1113   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1114   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1115   isend[3] += info->memory;  isend[4] += info->mallocs;
1116   if (flag == MAT_LOCAL) {
1117     info->nz_used      = isend[0];
1118     info->nz_allocated = isend[1];
1119     info->nz_unneeded  = isend[2];
1120     info->memory       = isend[3];
1121     info->mallocs      = isend[4];
1122   } else if (flag == MAT_GLOBAL_MAX) {
1123     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1124     info->nz_used      = irecv[0];
1125     info->nz_allocated = irecv[1];
1126     info->nz_unneeded  = irecv[2];
1127     info->memory       = irecv[3];
1128     info->mallocs      = irecv[4];
1129   } else if (flag == MAT_GLOBAL_SUM) {
1130     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1131     info->nz_used      = irecv[0];
1132     info->nz_allocated = irecv[1];
1133     info->nz_unneeded  = irecv[2];
1134     info->memory       = irecv[3];
1135     info->mallocs      = irecv[4];
1136   }
1137   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1138   info->fill_ratio_needed = 0;
1139   info->factor_mallocs    = 0;
1140   info->rows_global       = (double)matin->M;
1141   info->columns_global    = (double)matin->N;
1142   info->rows_local        = (double)matin->m;
1143   info->columns_local     = (double)matin->N;
1144 
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatSetOption_MPIAIJ"
1150 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1151 {
1152   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1153   int        ierr;
1154 
1155   PetscFunctionBegin;
1156   switch (op) {
1157   case MAT_NO_NEW_NONZERO_LOCATIONS:
1158   case MAT_YES_NEW_NONZERO_LOCATIONS:
1159   case MAT_COLUMNS_UNSORTED:
1160   case MAT_COLUMNS_SORTED:
1161   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1162   case MAT_KEEP_ZEROED_ROWS:
1163   case MAT_NEW_NONZERO_LOCATION_ERR:
1164   case MAT_USE_INODES:
1165   case MAT_DO_NOT_USE_INODES:
1166   case MAT_IGNORE_ZERO_ENTRIES:
1167     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1168     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_ROW_ORIENTED:
1171     a->roworiented = PETSC_TRUE;
1172     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1173     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1174     break;
1175   case MAT_ROWS_SORTED:
1176   case MAT_ROWS_UNSORTED:
1177   case MAT_YES_NEW_DIAGONALS:
1178     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1179     break;
1180   case MAT_COLUMN_ORIENTED:
1181     a->roworiented = PETSC_FALSE;
1182     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1183     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = PETSC_TRUE;
1187     break;
1188   case MAT_NO_NEW_DIAGONALS:
1189     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1190   default:
1191     SETERRQ(PETSC_ERR_SUP,"unknown option");
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatGetRow_MPIAIJ"
1198 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1199 {
1200   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1201   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1202   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1203   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1204   int          *cmap,*idx_p;
1205 
1206   PetscFunctionBegin;
1207   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1208   mat->getrowactive = PETSC_TRUE;
1209 
1210   if (!mat->rowvalues && (idx || v)) {
1211     /*
1212         allocate enough space to hold information from the longest row.
1213     */
1214     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1215     int     max = 1,tmp;
1216     for (i=0; i<matin->m; i++) {
1217       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1218       if (max < tmp) { max = tmp; }
1219     }
1220     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1221     mat->rowindices = (int*)(mat->rowvalues + max);
1222   }
1223 
1224   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1225   lrow = row - rstart;
1226 
1227   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1228   if (!v)   {pvA = 0; pvB = 0;}
1229   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1230   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1231   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1232   nztot = nzA + nzB;
1233 
1234   cmap  = mat->garray;
1235   if (v  || idx) {
1236     if (nztot) {
1237       /* Sort by increasing column numbers, assuming A and B already sorted */
1238       int imark = -1;
1239       if (v) {
1240         *v = v_p = mat->rowvalues;
1241         for (i=0; i<nzB; i++) {
1242           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1243           else break;
1244         }
1245         imark = i;
1246         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1247         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1248       }
1249       if (idx) {
1250         *idx = idx_p = mat->rowindices;
1251         if (imark > -1) {
1252           for (i=0; i<imark; i++) {
1253             idx_p[i] = cmap[cworkB[i]];
1254           }
1255         } else {
1256           for (i=0; i<nzB; i++) {
1257             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1258             else break;
1259           }
1260           imark = i;
1261         }
1262         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1263         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1264       }
1265     } else {
1266       if (idx) *idx = 0;
1267       if (v)   *v   = 0;
1268     }
1269   }
1270   *nz = nztot;
1271   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1272   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1278 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1279 {
1280   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1281 
1282   PetscFunctionBegin;
1283   if (aij->getrowactive == PETSC_FALSE) {
1284     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1285   }
1286   aij->getrowactive = PETSC_FALSE;
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatNorm_MPIAIJ"
1292 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1293 {
1294   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1295   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1296   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1297   PetscReal    sum = 0.0;
1298   PetscScalar  *v;
1299 
1300   PetscFunctionBegin;
1301   if (aij->size == 1) {
1302     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1303   } else {
1304     if (type == NORM_FROBENIUS) {
1305       v = amat->a;
1306       for (i=0; i<amat->nz; i++) {
1307 #if defined(PETSC_USE_COMPLEX)
1308         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1309 #else
1310         sum += (*v)*(*v); v++;
1311 #endif
1312       }
1313       v = bmat->a;
1314       for (i=0; i<bmat->nz; i++) {
1315 #if defined(PETSC_USE_COMPLEX)
1316         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1317 #else
1318         sum += (*v)*(*v); v++;
1319 #endif
1320       }
1321       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1322       *norm = sqrt(*norm);
1323     } else if (type == NORM_1) { /* max column norm */
1324       PetscReal *tmp,*tmp2;
1325       int    *jj,*garray = aij->garray;
1326       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1327       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1328       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1329       *norm = 0.0;
1330       v = amat->a; jj = amat->j;
1331       for (j=0; j<amat->nz; j++) {
1332         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1333       }
1334       v = bmat->a; jj = bmat->j;
1335       for (j=0; j<bmat->nz; j++) {
1336         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1337       }
1338       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1339       for (j=0; j<mat->N; j++) {
1340         if (tmp2[j] > *norm) *norm = tmp2[j];
1341       }
1342       ierr = PetscFree(tmp);CHKERRQ(ierr);
1343       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1344     } else if (type == NORM_INFINITY) { /* max row norm */
1345       PetscReal ntemp = 0.0;
1346       for (j=0; j<aij->A->m; j++) {
1347         v = amat->a + amat->i[j] + shift;
1348         sum = 0.0;
1349         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1350           sum += PetscAbsScalar(*v); v++;
1351         }
1352         v = bmat->a + bmat->i[j] + shift;
1353         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1354           sum += PetscAbsScalar(*v); v++;
1355         }
1356         if (sum > ntemp) ntemp = sum;
1357       }
1358       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1359     } else {
1360       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1361     }
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatTranspose_MPIAIJ"
1368 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1369 {
1370   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1371   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1372   int          ierr,shift = Aloc->indexshift;
1373   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1374   Mat          B;
1375   PetscScalar  *array;
1376 
1377   PetscFunctionBegin;
1378   if (!matout && M != N) {
1379     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1380   }
1381 
1382   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1383 
1384   /* copy over the A part */
1385   Aloc = (Mat_SeqAIJ*)a->A->data;
1386   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1387   row = a->rstart;
1388   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1389   for (i=0; i<m; i++) {
1390     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1391     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1392   }
1393   aj = Aloc->j;
1394   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1395 
1396   /* copy over the B part */
1397   Aloc = (Mat_SeqAIJ*)a->B->data;
1398   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1399   row  = a->rstart;
1400   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1401   ct   = cols;
1402   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1403   for (i=0; i<m; i++) {
1404     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1405     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1406   }
1407   ierr = PetscFree(ct);CHKERRQ(ierr);
1408   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1409   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1410   if (matout) {
1411     *matout = B;
1412   } else {
1413     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1414   }
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1420 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1421 {
1422   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1423   Mat        a = aij->A,b = aij->B;
1424   int        ierr,s1,s2,s3;
1425 
1426   PetscFunctionBegin;
1427   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1428   if (rr) {
1429     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1430     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1431     /* Overlap communication with computation. */
1432     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1433   }
1434   if (ll) {
1435     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1436     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1437     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1438   }
1439   /* scale  the diagonal block */
1440   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1441 
1442   if (rr) {
1443     /* Do a scatter end and then right scale the off-diagonal block */
1444     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1445     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1446   }
1447 
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1454 int MatPrintHelp_MPIAIJ(Mat A)
1455 {
1456   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1457   int        ierr;
1458 
1459   PetscFunctionBegin;
1460   if (!a->rank) {
1461     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1468 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1469 {
1470   PetscFunctionBegin;
1471   *bs = 1;
1472   PetscFunctionReturn(0);
1473 }
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1476 int MatSetUnfactored_MPIAIJ(Mat A)
1477 {
1478   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1479   int        ierr;
1480 
1481   PetscFunctionBegin;
1482   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatEqual_MPIAIJ"
1488 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1489 {
1490   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1491   Mat        a,b,c,d;
1492   PetscTruth flg;
1493   int        ierr;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1497   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1498   a = matA->A; b = matA->B;
1499   c = matB->A; d = matB->B;
1500 
1501   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1502   if (flg == PETSC_TRUE) {
1503     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1504   }
1505   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatCopy_MPIAIJ"
1511 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1512 {
1513   int        ierr;
1514   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1515   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1516   PetscTruth flg;
1517 
1518   PetscFunctionBegin;
1519   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1520   if (str != SAME_NONZERO_PATTERN || !flg) {
1521     /* because of the column compression in the off-processor part of the matrix a->B,
1522        the number of columns in a->B and b->B may be different, hence we cannot call
1523        the MatCopy() directly on the two parts. If need be, we can provide a more
1524        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1525        then copying the submatrices */
1526     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1527   } else {
1528     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1529     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1530   }
1531   PetscFunctionReturn(0);
1532 }
1533 
1534 #undef __FUNCT__
1535 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1536 int MatSetUpPreallocation_MPIAIJ(Mat A)
1537 {
1538   int        ierr;
1539 
1540   PetscFunctionBegin;
1541   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1546 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1547 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1548 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1549 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1550 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1551 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatFactorInfo*,Mat*);
1552 #endif
1553 
1554 #include "petscblaslapack.h"
1555 extern int MatAXPY_SeqAIJ(PetscScalar *,Mat,Mat,MatStructure);
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatAXPY_MPIAIJ"
1558 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1559 {
1560   int        ierr,one=1,i;
1561   Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1562   Mat_SeqAIJ *x,*y;
1563 
1564   PetscFunctionBegin;
1565   if (str == SAME_NONZERO_PATTERN) {
1566     x = (Mat_SeqAIJ *)xx->A->data;
1567     y = (Mat_SeqAIJ *)yy->A->data;
1568     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1569     x = (Mat_SeqAIJ *)xx->B->data;
1570     y = (Mat_SeqAIJ *)yy->B->data;
1571     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1572   } else if (str == SUBSET_NONZERO_PATTERN) {
1573     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1574 
1575     x = (Mat_SeqAIJ *)xx->B->data;
1576     y = (Mat_SeqAIJ *)yy->B->data;
1577     if (y->xtoy && y->XtoY != xx->B) {
1578       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1579       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1580     }
1581     if (!y->xtoy) { /* get xtoy */
1582       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1583       y->XtoY = xx->B;
1584     }
1585     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1586   } else {
1587     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 /* -------------------------------------------------------------------*/
1593 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1594        MatGetRow_MPIAIJ,
1595        MatRestoreRow_MPIAIJ,
1596        MatMult_MPIAIJ,
1597        MatMultAdd_MPIAIJ,
1598        MatMultTranspose_MPIAIJ,
1599        MatMultTransposeAdd_MPIAIJ,
1600        0,
1601        0,
1602        0,
1603        0,
1604        0,
1605        0,
1606        MatRelax_MPIAIJ,
1607        MatTranspose_MPIAIJ,
1608        MatGetInfo_MPIAIJ,
1609        MatEqual_MPIAIJ,
1610        MatGetDiagonal_MPIAIJ,
1611        MatDiagonalScale_MPIAIJ,
1612        MatNorm_MPIAIJ,
1613        MatAssemblyBegin_MPIAIJ,
1614        MatAssemblyEnd_MPIAIJ,
1615        0,
1616        MatSetOption_MPIAIJ,
1617        MatZeroEntries_MPIAIJ,
1618        MatZeroRows_MPIAIJ,
1619 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1620        MatLUFactorSymbolic_MPIAIJ_TFS,
1621 #else
1622        0,
1623 #endif
1624        0,
1625        0,
1626        0,
1627        MatSetUpPreallocation_MPIAIJ,
1628        0,
1629        0,
1630        0,
1631        0,
1632        MatDuplicate_MPIAIJ,
1633        0,
1634        0,
1635        0,
1636        0,
1637        MatAXPY_MPIAIJ,
1638        MatGetSubMatrices_MPIAIJ,
1639        MatIncreaseOverlap_MPIAIJ,
1640        MatGetValues_MPIAIJ,
1641        MatCopy_MPIAIJ,
1642        MatPrintHelp_MPIAIJ,
1643        MatScale_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647        MatGetBlockSize_MPIAIJ,
1648        0,
1649        0,
1650        0,
1651        0,
1652        MatFDColoringCreate_MPIAIJ,
1653        0,
1654        MatSetUnfactored_MPIAIJ,
1655        0,
1656        0,
1657        MatGetSubMatrix_MPIAIJ,
1658        MatDestroy_MPIAIJ,
1659        MatView_MPIAIJ,
1660        MatGetPetscMaps_Petsc,
1661        0,
1662        0,
1663        0,
1664        0,
1665        0,
1666        0,
1667        0,
1668        0,
1669        MatSetColoring_MPIAIJ,
1670        MatSetValuesAdic_MPIAIJ,
1671        MatSetValuesAdifor_MPIAIJ
1672 };
1673 
1674 /* ----------------------------------------------------------------------------------------*/
1675 
1676 EXTERN_C_BEGIN
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1679 int MatStoreValues_MPIAIJ(Mat mat)
1680 {
1681   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1682   int        ierr;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1686   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1687   PetscFunctionReturn(0);
1688 }
1689 EXTERN_C_END
1690 
1691 EXTERN_C_BEGIN
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1694 int MatRetrieveValues_MPIAIJ(Mat mat)
1695 {
1696   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1697   int        ierr;
1698 
1699   PetscFunctionBegin;
1700   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1701   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 EXTERN_C_END
1705 
1706 #include "petscpc.h"
1707 EXTERN_C_BEGIN
1708 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1709 EXTERN_C_END
1710 
1711 EXTERN_C_BEGIN
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatCreate_MPIAIJ"
1714 int MatCreate_MPIAIJ(Mat B)
1715 {
1716   Mat_MPIAIJ *b;
1717   int        ierr,i,size;
1718 
1719   PetscFunctionBegin;
1720   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1721 
1722   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1723   B->data         = (void*)b;
1724   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1725   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1726   B->factor       = 0;
1727   B->assembled    = PETSC_FALSE;
1728   B->mapping      = 0;
1729 
1730   B->insertmode      = NOT_SET_VALUES;
1731   b->size            = size;
1732   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1733 
1734   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1735   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1736 
1737   /* the information in the maps duplicates the information computed below, eventually
1738      we should remove the duplicate information that is not contained in the maps */
1739   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1740   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1741 
1742   /* build local table of row and column ownerships */
1743   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1744   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1745   b->cowners = b->rowners + b->size + 2;
1746   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1747   b->rowners[0] = 0;
1748   for (i=2; i<=b->size; i++) {
1749     b->rowners[i] += b->rowners[i-1];
1750   }
1751   b->rstart = b->rowners[b->rank];
1752   b->rend   = b->rowners[b->rank+1];
1753   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1754   b->cowners[0] = 0;
1755   for (i=2; i<=b->size; i++) {
1756     b->cowners[i] += b->cowners[i-1];
1757   }
1758   b->cstart = b->cowners[b->rank];
1759   b->cend   = b->cowners[b->rank+1];
1760 
1761   /* build cache for off array entries formed */
1762   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1763   b->donotstash  = PETSC_FALSE;
1764   b->colmap      = 0;
1765   b->garray      = 0;
1766   b->roworiented = PETSC_TRUE;
1767 
1768   /* stuff used for matrix vector multiply */
1769   b->lvec      = PETSC_NULL;
1770   b->Mvctx     = PETSC_NULL;
1771 
1772   /* stuff for MatGetRow() */
1773   b->rowindices   = 0;
1774   b->rowvalues    = 0;
1775   b->getrowactive = PETSC_FALSE;
1776 
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1778                                      "MatStoreValues_MPIAIJ",
1779                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1781                                      "MatRetrieveValues_MPIAIJ",
1782                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1784 				     "MatGetDiagonalBlock_MPIAIJ",
1785                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsSymmetric_C",
1787 				     "MatIsSymmetric_MPIAIJ",
1788 				     MatIsSymmetric_MPIAIJ); CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 EXTERN_C_END
1792 
1793 #undef __FUNCT__
1794 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1795 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1796 {
1797   Mat        mat;
1798   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1799   int        ierr;
1800 
1801   PetscFunctionBegin;
1802   *newmat       = 0;
1803   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1804   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1805   a    = (Mat_MPIAIJ*)mat->data;
1806   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1807   mat->factor       = matin->factor;
1808   mat->assembled    = PETSC_TRUE;
1809   mat->insertmode   = NOT_SET_VALUES;
1810   mat->preallocated = PETSC_TRUE;
1811 
1812   a->rstart       = oldmat->rstart;
1813   a->rend         = oldmat->rend;
1814   a->cstart       = oldmat->cstart;
1815   a->cend         = oldmat->cend;
1816   a->size         = oldmat->size;
1817   a->rank         = oldmat->rank;
1818   a->donotstash   = oldmat->donotstash;
1819   a->roworiented  = oldmat->roworiented;
1820   a->rowindices   = 0;
1821   a->rowvalues    = 0;
1822   a->getrowactive = PETSC_FALSE;
1823 
1824   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1825   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1826   if (oldmat->colmap) {
1827 #if defined (PETSC_USE_CTABLE)
1828     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1829 #else
1830     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1831     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1832     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1833 #endif
1834   } else a->colmap = 0;
1835   if (oldmat->garray) {
1836     int len;
1837     len  = oldmat->B->n;
1838     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1839     PetscLogObjectMemory(mat,len*sizeof(int));
1840     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1841   } else a->garray = 0;
1842 
1843   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1844   PetscLogObjectParent(mat,a->lvec);
1845   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1846   PetscLogObjectParent(mat,a->Mvctx);
1847   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1848   PetscLogObjectParent(mat,a->A);
1849   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1850   PetscLogObjectParent(mat,a->B);
1851   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1852   *newmat = mat;
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #include "petscsys.h"
1857 
1858 EXTERN_C_BEGIN
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatLoad_MPIAIJ"
1861 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1862 {
1863   Mat          A;
1864   PetscScalar  *vals,*svals;
1865   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1866   MPI_Status   status;
1867   int          i,nz,ierr,j,rstart,rend,fd;
1868   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1869   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1870   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1871 #if defined(PETSC_HAVE_SPOOLES) || defined(PETSC_HAVE_SUPERLUDIST)
1872   PetscTruth   flag;
1873 #endif
1874 
1875   PetscFunctionBegin;
1876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1877   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1878   if (!rank) {
1879     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1880     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1881     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1882     if (header[3] < 0) {
1883       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1884     }
1885   }
1886 
1887   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1888   M = header[1]; N = header[2];
1889   /* determine ownership of all rows */
1890   m = M/size + ((M % size) > rank);
1891   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1892   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1893   rowners[0] = 0;
1894   for (i=2; i<=size; i++) {
1895     rowners[i] += rowners[i-1];
1896   }
1897   rstart = rowners[rank];
1898   rend   = rowners[rank+1];
1899 
1900   /* distribute row lengths to all processors */
1901   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1902   offlens = ourlens + (rend-rstart);
1903   if (!rank) {
1904     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1905     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1906     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1907     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1908     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1909     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1910   } else {
1911     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1912   }
1913 
1914   if (!rank) {
1915     /* calculate the number of nonzeros on each processor */
1916     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1917     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1918     for (i=0; i<size; i++) {
1919       for (j=rowners[i]; j< rowners[i+1]; j++) {
1920         procsnz[i] += rowlengths[j];
1921       }
1922     }
1923     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1924 
1925     /* determine max buffer needed and allocate it */
1926     maxnz = 0;
1927     for (i=0; i<size; i++) {
1928       maxnz = PetscMax(maxnz,procsnz[i]);
1929     }
1930     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1931 
1932     /* read in my part of the matrix column indices  */
1933     nz   = procsnz[0];
1934     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1935     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1936 
1937     /* read in every one elses and ship off */
1938     for (i=1; i<size; i++) {
1939       nz   = procsnz[i];
1940       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1941       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1942     }
1943     ierr = PetscFree(cols);CHKERRQ(ierr);
1944   } else {
1945     /* determine buffer space needed for message */
1946     nz = 0;
1947     for (i=0; i<m; i++) {
1948       nz += ourlens[i];
1949     }
1950     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1951 
1952     /* receive message of column indices*/
1953     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1954     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1955     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1956   }
1957 
1958   /* determine column ownership if matrix is not square */
1959   if (N != M) {
1960     n      = N/size + ((N % size) > rank);
1961     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1962     cstart = cend - n;
1963   } else {
1964     cstart = rstart;
1965     cend   = rend;
1966     n      = cend - cstart;
1967   }
1968 
1969   /* loop over local rows, determining number of off diagonal entries */
1970   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1971   jj = 0;
1972   for (i=0; i<m; i++) {
1973     for (j=0; j<ourlens[i]; j++) {
1974       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1975       jj++;
1976     }
1977   }
1978 
1979   /* create our matrix */
1980   for (i=0; i<m; i++) {
1981     ourlens[i] -= offlens[i];
1982   }
1983   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1984   A = *newmat;
1985   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1986   for (i=0; i<m; i++) {
1987     ourlens[i] += offlens[i];
1988   }
1989 
1990   if (!rank) {
1991     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1992 
1993     /* read in my part of the matrix numerical values  */
1994     nz   = procsnz[0];
1995     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1996 
1997     /* insert into matrix */
1998     jj      = rstart;
1999     smycols = mycols;
2000     svals   = vals;
2001     for (i=0; i<m; i++) {
2002       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2003       smycols += ourlens[i];
2004       svals   += ourlens[i];
2005       jj++;
2006     }
2007 
2008     /* read in other processors and ship out */
2009     for (i=1; i<size; i++) {
2010       nz   = procsnz[i];
2011       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2012       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2013     }
2014     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2015   } else {
2016     /* receive numeric values */
2017     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2018 
2019     /* receive message of values*/
2020     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2021     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2022     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2023 
2024     /* insert into matrix */
2025     jj      = rstart;
2026     smycols = mycols;
2027     svals   = vals;
2028     for (i=0; i<m; i++) {
2029       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2030       smycols += ourlens[i];
2031       svals   += ourlens[i];
2032       jj++;
2033     }
2034   }
2035   ierr = PetscFree(ourlens);CHKERRQ(ierr);
2036   ierr = PetscFree(vals);CHKERRQ(ierr);
2037   ierr = PetscFree(mycols);CHKERRQ(ierr);
2038   ierr = PetscFree(rowners);CHKERRQ(ierr);
2039 
2040   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042 #if defined(PETSC_HAVE_SPOOLES)
2043   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
2044   if (flag) {
2045     if (size == 1) {
2046       ierr = MatUseSpooles_SeqAIJ(A);CHKERRQ(ierr);
2047     } else {
2048       ierr = MatUseSpooles_MPIAIJ(A);CHKERRQ(ierr);
2049     }
2050   }
2051 #endif
2052 #if defined(PETSC_HAVE_SUPERLUDIST)
2053   ierr = PetscOptionsHasName(A->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
2054   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr); }
2055 #endif
2056   PetscFunctionReturn(0);
2057 }
2058 EXTERN_C_END
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2062 /*
2063     Not great since it makes two copies of the submatrix, first an SeqAIJ
2064   in local and then by concatenating the local matrices the end result.
2065   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2066 */
2067 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
2068 {
2069   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
2070   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2071   Mat          *local,M,Mreuse;
2072   PetscScalar  *vwork,*aa;
2073   MPI_Comm     comm = mat->comm;
2074   Mat_SeqAIJ   *aij;
2075 
2076 
2077   PetscFunctionBegin;
2078   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2079   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2080 
2081   if (call ==  MAT_REUSE_MATRIX) {
2082     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2083     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2084     local = &Mreuse;
2085     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2086   } else {
2087     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2088     Mreuse = *local;
2089     ierr   = PetscFree(local);CHKERRQ(ierr);
2090   }
2091 
2092   /*
2093       m - number of local rows
2094       n - number of columns (same on all processors)
2095       rstart - first row in new global matrix generated
2096   */
2097   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2098   if (call == MAT_INITIAL_MATRIX) {
2099     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2100     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2101     ii  = aij->i;
2102     jj  = aij->j;
2103 
2104     /*
2105         Determine the number of non-zeros in the diagonal and off-diagonal
2106         portions of the matrix in order to do correct preallocation
2107     */
2108 
2109     /* first get start and end of "diagonal" columns */
2110     if (csize == PETSC_DECIDE) {
2111       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2112       if (mglobal == n) { /* square matrix */
2113 	nlocal = m;
2114       } else {
2115         nlocal = n/size + ((n % size) > rank);
2116       }
2117     } else {
2118       nlocal = csize;
2119     }
2120     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2121     rstart = rend - nlocal;
2122     if (rank == size - 1 && rend != n) {
2123       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2124     }
2125 
2126     /* next, compute all the lengths */
2127     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2128     olens = dlens + m;
2129     for (i=0; i<m; i++) {
2130       jend = ii[i+1] - ii[i];
2131       olen = 0;
2132       dlen = 0;
2133       for (j=0; j<jend; j++) {
2134         if (*jj < rstart || *jj >= rend) olen++;
2135         else dlen++;
2136         jj++;
2137       }
2138       olens[i] = olen;
2139       dlens[i] = dlen;
2140     }
2141     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2142     ierr = PetscFree(dlens);CHKERRQ(ierr);
2143   } else {
2144     int ml,nl;
2145 
2146     M = *newmat;
2147     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2148     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2149     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2150     /*
2151          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2152        rather than the slower MatSetValues().
2153     */
2154     M->was_assembled = PETSC_TRUE;
2155     M->assembled     = PETSC_FALSE;
2156   }
2157   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2158   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2159   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2160   ii  = aij->i;
2161   jj  = aij->j;
2162   aa  = aij->a;
2163   for (i=0; i<m; i++) {
2164     row   = rstart + i;
2165     nz    = ii[i+1] - ii[i];
2166     cwork = jj;     jj += nz;
2167     vwork = aa;     aa += nz;
2168     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2169   }
2170 
2171   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2173   *newmat = M;
2174 
2175   /* save submatrix used in processor for next request */
2176   if (call ==  MAT_INITIAL_MATRIX) {
2177     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2178     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2179   }
2180 
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 #undef __FUNCT__
2185 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2186 /*@C
2187    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2188    (the default parallel PETSc format).  For good matrix assembly performance
2189    the user should preallocate the matrix storage by setting the parameters
2190    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2191    performance can be increased by more than a factor of 50.
2192 
2193    Collective on MPI_Comm
2194 
2195    Input Parameters:
2196 +  A - the matrix
2197 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2198            (same value is used for all local rows)
2199 .  d_nnz - array containing the number of nonzeros in the various rows of the
2200            DIAGONAL portion of the local submatrix (possibly different for each row)
2201            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2202            The size of this array is equal to the number of local rows, i.e 'm'.
2203            You must leave room for the diagonal entry even if it is zero.
2204 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2205            submatrix (same value is used for all local rows).
2206 -  o_nnz - array containing the number of nonzeros in the various rows of the
2207            OFF-DIAGONAL portion of the local submatrix (possibly different for
2208            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2209            structure. The size of this array is equal to the number
2210            of local rows, i.e 'm'.
2211 
2212    The AIJ format (also called the Yale sparse matrix format or
2213    compressed row storage), is fully compatible with standard Fortran 77
2214    storage.  That is, the stored row and column indices can begin at
2215    either one (as in Fortran) or zero.  See the users manual for details.
2216 
2217    The user MUST specify either the local or global matrix dimensions
2218    (possibly both).
2219 
2220    The parallel matrix is partitioned such that the first m0 rows belong to
2221    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2222    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2223 
2224    The DIAGONAL portion of the local submatrix of a processor can be defined
2225    as the submatrix which is obtained by extraction the part corresponding
2226    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2227    first row that belongs to the processor, and r2 is the last row belonging
2228    to the this processor. This is a square mxm matrix. The remaining portion
2229    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2230 
2231    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2232 
2233    By default, this format uses inodes (identical nodes) when possible.
2234    We search for consecutive rows with the same nonzero structure, thereby
2235    reusing matrix information to achieve increased efficiency.
2236 
2237    Options Database Keys:
2238 +  -mat_aij_no_inode  - Do not use inodes
2239 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2240 -  -mat_aij_oneindex - Internally use indexing starting at 1
2241         rather than 0.  Note that when calling MatSetValues(),
2242         the user still MUST index entries starting at 0!
2243 
2244    Example usage:
2245 
2246    Consider the following 8x8 matrix with 34 non-zero values, that is
2247    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2248    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2249    as follows:
2250 
2251 .vb
2252             1  2  0  |  0  3  0  |  0  4
2253     Proc0   0  5  6  |  7  0  0  |  8  0
2254             9  0 10  | 11  0  0  | 12  0
2255     -------------------------------------
2256            13  0 14  | 15 16 17  |  0  0
2257     Proc1   0 18  0  | 19 20 21  |  0  0
2258             0  0  0  | 22 23  0  | 24  0
2259     -------------------------------------
2260     Proc2  25 26 27  |  0  0 28  | 29  0
2261            30  0  0  | 31 32 33  |  0 34
2262 .ve
2263 
2264    This can be represented as a collection of submatrices as:
2265 
2266 .vb
2267       A B C
2268       D E F
2269       G H I
2270 .ve
2271 
2272    Where the submatrices A,B,C are owned by proc0, D,E,F are
2273    owned by proc1, G,H,I are owned by proc2.
2274 
2275    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2276    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2277    The 'M','N' parameters are 8,8, and have the same values on all procs.
2278 
2279    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2280    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2281    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2282    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2283    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2284    matrix, ans [DF] as another SeqAIJ matrix.
2285 
2286    When d_nz, o_nz parameters are specified, d_nz storage elements are
2287    allocated for every row of the local diagonal submatrix, and o_nz
2288    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2289    One way to choose d_nz and o_nz is to use the max nonzerors per local
2290    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2291    In this case, the values of d_nz,o_nz are:
2292 .vb
2293      proc0 : dnz = 2, o_nz = 2
2294      proc1 : dnz = 3, o_nz = 2
2295      proc2 : dnz = 1, o_nz = 4
2296 .ve
2297    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2298    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2299    for proc3. i.e we are using 12+15+10=37 storage locations to store
2300    34 values.
2301 
2302    When d_nnz, o_nnz parameters are specified, the storage is specified
2303    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2304    In the above case the values for d_nnz,o_nnz are:
2305 .vb
2306      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2307      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2308      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2309 .ve
2310    Here the space allocated is sum of all the above values i.e 34, and
2311    hence pre-allocation is perfect.
2312 
2313    Level: intermediate
2314 
2315 .keywords: matrix, aij, compressed row, sparse, parallel
2316 
2317 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2318 @*/
2319 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2320 {
2321   Mat_MPIAIJ   *b;
2322   int          ierr,i;
2323   PetscTruth   flg2;
2324 
2325   PetscFunctionBegin;
2326   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2327   if (!flg2) PetscFunctionReturn(0);
2328   B->preallocated = PETSC_TRUE;
2329   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2330   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2331   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2332   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2333   if (d_nnz) {
2334     for (i=0; i<B->m; i++) {
2335       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2336     }
2337   }
2338   if (o_nnz) {
2339     for (i=0; i<B->m; i++) {
2340       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2341     }
2342   }
2343   b = (Mat_MPIAIJ*)B->data;
2344 
2345   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2346   PetscLogObjectParent(B,b->A);
2347   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2348   PetscLogObjectParent(B,b->B);
2349 
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatCreateMPIAIJ"
2355 /*@C
2356    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2357    (the default parallel PETSc format).  For good matrix assembly performance
2358    the user should preallocate the matrix storage by setting the parameters
2359    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2360    performance can be increased by more than a factor of 50.
2361 
2362    Collective on MPI_Comm
2363 
2364    Input Parameters:
2365 +  comm - MPI communicator
2366 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2367            This value should be the same as the local size used in creating the
2368            y vector for the matrix-vector product y = Ax.
2369 .  n - This value should be the same as the local size used in creating the
2370        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2371        calculated if N is given) For square matrices n is almost always m.
2372 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2373 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2374 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2375            (same value is used for all local rows)
2376 .  d_nnz - array containing the number of nonzeros in the various rows of the
2377            DIAGONAL portion of the local submatrix (possibly different for each row)
2378            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2379            The size of this array is equal to the number of local rows, i.e 'm'.
2380            You must leave room for the diagonal entry even if it is zero.
2381 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2382            submatrix (same value is used for all local rows).
2383 -  o_nnz - array containing the number of nonzeros in the various rows of the
2384            OFF-DIAGONAL portion of the local submatrix (possibly different for
2385            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2386            structure. The size of this array is equal to the number
2387            of local rows, i.e 'm'.
2388 
2389    Output Parameter:
2390 .  A - the matrix
2391 
2392    Notes:
2393    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2394    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2395    storage requirements for this matrix.
2396 
2397    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2398    processor than it must be used on all processors that share the object for
2399    that argument.
2400 
2401    The AIJ format (also called the Yale sparse matrix format or
2402    compressed row storage), is fully compatible with standard Fortran 77
2403    storage.  That is, the stored row and column indices can begin at
2404    either one (as in Fortran) or zero.  See the users manual for details.
2405 
2406    The user MUST specify either the local or global matrix dimensions
2407    (possibly both).
2408 
2409    The parallel matrix is partitioned such that the first m0 rows belong to
2410    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2411    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2412 
2413    The DIAGONAL portion of the local submatrix of a processor can be defined
2414    as the submatrix which is obtained by extraction the part corresponding
2415    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2416    first row that belongs to the processor, and r2 is the last row belonging
2417    to the this processor. This is a square mxm matrix. The remaining portion
2418    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2419 
2420    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2421 
2422    By default, this format uses inodes (identical nodes) when possible.
2423    We search for consecutive rows with the same nonzero structure, thereby
2424    reusing matrix information to achieve increased efficiency.
2425 
2426    Options Database Keys:
2427 +  -mat_aij_no_inode  - Do not use inodes
2428 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2429 -  -mat_aij_oneindex - Internally use indexing starting at 1
2430         rather than 0.  Note that when calling MatSetValues(),
2431         the user still MUST index entries starting at 0!
2432 
2433 
2434    Example usage:
2435 
2436    Consider the following 8x8 matrix with 34 non-zero values, that is
2437    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2438    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2439    as follows:
2440 
2441 .vb
2442             1  2  0  |  0  3  0  |  0  4
2443     Proc0   0  5  6  |  7  0  0  |  8  0
2444             9  0 10  | 11  0  0  | 12  0
2445     -------------------------------------
2446            13  0 14  | 15 16 17  |  0  0
2447     Proc1   0 18  0  | 19 20 21  |  0  0
2448             0  0  0  | 22 23  0  | 24  0
2449     -------------------------------------
2450     Proc2  25 26 27  |  0  0 28  | 29  0
2451            30  0  0  | 31 32 33  |  0 34
2452 .ve
2453 
2454    This can be represented as a collection of submatrices as:
2455 
2456 .vb
2457       A B C
2458       D E F
2459       G H I
2460 .ve
2461 
2462    Where the submatrices A,B,C are owned by proc0, D,E,F are
2463    owned by proc1, G,H,I are owned by proc2.
2464 
2465    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2466    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2467    The 'M','N' parameters are 8,8, and have the same values on all procs.
2468 
2469    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2470    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2471    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2472    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2473    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2474    matrix, ans [DF] as another SeqAIJ matrix.
2475 
2476    When d_nz, o_nz parameters are specified, d_nz storage elements are
2477    allocated for every row of the local diagonal submatrix, and o_nz
2478    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2479    One way to choose d_nz and o_nz is to use the max nonzerors per local
2480    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2481    In this case, the values of d_nz,o_nz are:
2482 .vb
2483      proc0 : dnz = 2, o_nz = 2
2484      proc1 : dnz = 3, o_nz = 2
2485      proc2 : dnz = 1, o_nz = 4
2486 .ve
2487    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2488    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2489    for proc3. i.e we are using 12+15+10=37 storage locations to store
2490    34 values.
2491 
2492    When d_nnz, o_nnz parameters are specified, the storage is specified
2493    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2494    In the above case the values for d_nnz,o_nnz are:
2495 .vb
2496      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2497      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2498      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2499 .ve
2500    Here the space allocated is sum of all the above values i.e 34, and
2501    hence pre-allocation is perfect.
2502 
2503    Level: intermediate
2504 
2505 .keywords: matrix, aij, compressed row, sparse, parallel
2506 
2507 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2508 @*/
2509 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2510 {
2511   int ierr,size;
2512 
2513   PetscFunctionBegin;
2514   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2515   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2516   if (size > 1) {
2517     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2518     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2519   } else {
2520     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2521     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2522   }
2523   PetscFunctionReturn(0);
2524 }
2525 
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2528 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2529 {
2530   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2531   PetscFunctionBegin;
2532   *Ad     = a->A;
2533   *Ao     = a->B;
2534   *colmap = a->garray;
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2540 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2541 {
2542   int        ierr,i;
2543   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2544 
2545   PetscFunctionBegin;
2546   if (coloring->ctype == IS_COLORING_LOCAL) {
2547     ISColoringValue *allcolors,*colors;
2548     ISColoring      ocoloring;
2549 
2550     /* set coloring for diagonal portion */
2551     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2552 
2553     /* set coloring for off-diagonal portion */
2554     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2555     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2556     for (i=0; i<a->B->n; i++) {
2557       colors[i] = allcolors[a->garray[i]];
2558     }
2559     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2560     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2561     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2562     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2563   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2564     ISColoringValue *colors;
2565     int             *larray;
2566     ISColoring      ocoloring;
2567 
2568     /* set coloring for diagonal portion */
2569     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2570     for (i=0; i<a->A->n; i++) {
2571       larray[i] = i + a->cstart;
2572     }
2573     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2574     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2575     for (i=0; i<a->A->n; i++) {
2576       colors[i] = coloring->colors[larray[i]];
2577     }
2578     ierr = PetscFree(larray);CHKERRQ(ierr);
2579     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2580     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2581     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2582 
2583     /* set coloring for off-diagonal portion */
2584     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2585     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2586     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2587     for (i=0; i<a->B->n; i++) {
2588       colors[i] = coloring->colors[larray[i]];
2589     }
2590     ierr = PetscFree(larray);CHKERRQ(ierr);
2591     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2592     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2593     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2594   } else {
2595     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2596   }
2597 
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2603 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2604 {
2605   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2606   int        ierr;
2607 
2608   PetscFunctionBegin;
2609   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2610   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2616 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2617 {
2618   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2619   int        ierr;
2620 
2621   PetscFunctionBegin;
2622   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2623   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 #undef __FUNCT__
2628 #define __FUNCT__ "MatMerge"
2629 /*@C
2630       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2631                  matrices from each processor
2632 
2633     Collective on MPI_Comm
2634 
2635    Input Parameters:
2636 +    comm - the communicators the parallel matrix will live on
2637 -    inmat - the input sequential matrices
2638 
2639    Output Parameter:
2640 .    outmat - the parallel matrix generated
2641 
2642     Level: advanced
2643 
2644    Notes: The number of columns of the matrix in EACH of the seperate files
2645       MUST be the same.
2646 
2647 @*/
2648 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2649 {
2650   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2651   PetscScalar *values;
2652   PetscMap    columnmap,rowmap;
2653 
2654   PetscFunctionBegin;
2655 
2656   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2657 
2658   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2659   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2660   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2661   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2662   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2663   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2664 
2665   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2666   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2667   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2668   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2669   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2670 
2671   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2672   for (i=0;i<m;i++) {
2673     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2674     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2675     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2676   }
2677   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2678   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2679 
2680   for (i=0;i<m;i++) {
2681     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2682     I    = i + rstart;
2683     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2684     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2685   }
2686   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2687   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2688   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2689 
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 #undef __FUNCT__
2694 #define __FUNCT__ "MatFileSplit"
2695 int MatFileSplit(Mat A,char *outfile)
2696 {
2697   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2698   PetscViewer out;
2699   char        *name;
2700   Mat         B;
2701   PetscScalar *values;
2702 
2703   PetscFunctionBegin;
2704 
2705   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2706   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2707   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2708   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2709   for (i=0;i<m;i++) {
2710     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2711     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2712     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2713   }
2714   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2715   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2716 
2717   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2718   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2719   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2720   sprintf(name,"%s.%d",outfile,rank);
2721   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2722   ierr = PetscFree(name);
2723   ierr = MatView(B,out);CHKERRQ(ierr);
2724   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2725   ierr = MatDestroy(B);CHKERRQ(ierr);
2726   PetscFunctionReturn(0);
2727 }
2728