xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision efdaee170e8533f17ad2b08f5e7673d1f265d827)
1 /*$Id: mpiaij.c,v 1.344 2001/08/10 03:30:48 bsmith Exp $*/
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/vec/vecimpl.h"
5 #include "src/inline/spops.h"
6 
7 EXTERN int MatSetUpMultiply_MPIAIJ(Mat);
8 EXTERN int DisAssemble_MPIAIJ(Mat);
9 EXTERN int MatSetValues_SeqAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
10 EXTERN int MatGetRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
11 EXTERN int MatRestoreRow_SeqAIJ(Mat,int,int*,int**,PetscScalar**);
12 EXTERN int MatPrintHelp_SeqAIJ(Mat);
13 
14 /*
15   Local utility routine that creates a mapping from the global column
16 number to the local number in the off-diagonal part of the local
17 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
18 a slightly higher hash table cost; without it it is not scalable (each processor
19 has an order N integer array but is fast to acess.
20 */
21 #undef __FUNCT__
22 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
23 int CreateColmap_MPIAIJ_Private(Mat mat)
24 {
25   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
26   int        n = aij->B->n,i,ierr;
27 
28   PetscFunctionBegin;
29 #if defined (PETSC_USE_CTABLE)
30   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
31   for (i=0; i<n; i++){
32     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
33   }
34 #else
35   ierr = PetscMalloc((mat->N+1)*sizeof(int),&aij->colmap);CHKERRQ(ierr);
36   PetscLogObjectMemory(mat,mat->N*sizeof(int));
37   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(int));CHKERRQ(ierr);
38   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
39 #endif
40   PetscFunctionReturn(0);
41 }
42 
43 #define CHUNKSIZE   15
44 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
45 { \
46  \
47     rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
48     rmax = aimax[row]; nrow = ailen[row];  \
49     col1 = col - shift; \
50      \
51     low = 0; high = nrow; \
52     while (high-low > 5) { \
53       t = (low+high)/2; \
54       if (rp[t] > col) high = t; \
55       else             low  = t; \
56     } \
57       for (_i=low; _i<high; _i++) { \
58         if (rp[_i] > col1) break; \
59         if (rp[_i] == col1) { \
60           if (addv == ADD_VALUES) ap[_i] += value;   \
61           else                  ap[_i] = value; \
62           goto a_noinsert; \
63         } \
64       }  \
65       if (nonew == 1) goto a_noinsert; \
66       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
67       if (nrow >= rmax) { \
68         /* there is no extra room in row, therefore enlarge */ \
69         int    new_nz = ai[am] + CHUNKSIZE,len,*new_i,*new_j; \
70         PetscScalar *new_a; \
71  \
72         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
73  \
74         /* malloc new storage space */ \
75         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(am+1)*sizeof(int); \
76         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
77         new_j   = (int*)(new_a + new_nz); \
78         new_i   = new_j + new_nz; \
79  \
80         /* copy over old data into new slots */ \
81         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} \
82         for (ii=row+1; ii<am+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
83         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
84         len = (new_nz - CHUNKSIZE - ai[row] - nrow - shift); \
85         ierr = PetscMemcpy(new_j+ai[row]+shift+nrow+CHUNKSIZE,aj+ai[row]+shift+nrow, \
86                                                            len*sizeof(int));CHKERRQ(ierr); \
87         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
88         ierr = PetscMemcpy(new_a+ai[row]+shift+nrow+CHUNKSIZE,aa+ai[row]+shift+nrow, \
89                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
90         /* free up old matrix storage */ \
91  \
92         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
93         if (!a->singlemalloc) { \
94            ierr = PetscFree(a->i);CHKERRQ(ierr); \
95            ierr = PetscFree(a->j);CHKERRQ(ierr); \
96         } \
97         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
98         a->singlemalloc = PETSC_TRUE; \
99  \
100         rp   = aj + ai[row] + shift; ap = aa + ai[row] + shift; \
101         rmax = aimax[row] = aimax[row] + CHUNKSIZE; \
102         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
103         a->maxnz += CHUNKSIZE; \
104         a->reallocs++; \
105       } \
106       N = nrow++ - 1; a->nz++; \
107       /* shift up all the later entries in this row */ \
108       for (ii=N; ii>=_i; ii--) { \
109         rp[ii+1] = rp[ii]; \
110         ap[ii+1] = ap[ii]; \
111       } \
112       rp[_i] = col1;  \
113       ap[_i] = value;  \
114       a_noinsert: ; \
115       ailen[row] = nrow; \
116 }
117 
118 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
119 { \
120  \
121     rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
122     rmax = bimax[row]; nrow = bilen[row];  \
123     col1 = col - shift; \
124      \
125     low = 0; high = nrow; \
126     while (high-low > 5) { \
127       t = (low+high)/2; \
128       if (rp[t] > col) high = t; \
129       else             low  = t; \
130     } \
131        for (_i=low; _i<high; _i++) { \
132         if (rp[_i] > col1) break; \
133         if (rp[_i] == col1) { \
134           if (addv == ADD_VALUES) ap[_i] += value;   \
135           else                  ap[_i] = value; \
136           goto b_noinsert; \
137         } \
138       }  \
139       if (nonew == 1) goto b_noinsert; \
140       else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
141       if (nrow >= rmax) { \
142         /* there is no extra room in row, therefore enlarge */ \
143         int    new_nz = bi[bm] + CHUNKSIZE,len,*new_i,*new_j; \
144         PetscScalar *new_a; \
145  \
146         if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
147  \
148         /* malloc new storage space */ \
149         len     = new_nz*(sizeof(int)+sizeof(PetscScalar))+(bm+1)*sizeof(int); \
150         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
151         new_j   = (int*)(new_a + new_nz); \
152         new_i   = new_j + new_nz; \
153  \
154         /* copy over old data into new slots */ \
155         for (ii=0; ii<row+1; ii++) {new_i[ii] = bi[ii];} \
156         for (ii=row+1; ii<bm+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
157         ierr = PetscMemcpy(new_j,bj,(bi[row]+nrow+shift)*sizeof(int));CHKERRQ(ierr); \
158         len = (new_nz - CHUNKSIZE - bi[row] - nrow - shift); \
159         ierr = PetscMemcpy(new_j+bi[row]+shift+nrow+CHUNKSIZE,bj+bi[row]+shift+nrow, \
160                                                            len*sizeof(int));CHKERRQ(ierr); \
161         ierr = PetscMemcpy(new_a,ba,(bi[row]+nrow+shift)*sizeof(PetscScalar));CHKERRQ(ierr); \
162         ierr = PetscMemcpy(new_a+bi[row]+shift+nrow+CHUNKSIZE,ba+bi[row]+shift+nrow, \
163                                                            len*sizeof(PetscScalar));CHKERRQ(ierr);  \
164         /* free up old matrix storage */ \
165  \
166         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
167         if (!b->singlemalloc) { \
168           ierr = PetscFree(b->i);CHKERRQ(ierr); \
169           ierr = PetscFree(b->j);CHKERRQ(ierr); \
170         } \
171         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
172         b->singlemalloc = PETSC_TRUE; \
173  \
174         rp   = bj + bi[row] + shift; ap = ba + bi[row] + shift; \
175         rmax = bimax[row] = bimax[row] + CHUNKSIZE; \
176         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + sizeof(PetscScalar))); \
177         b->maxnz += CHUNKSIZE; \
178         b->reallocs++; \
179       } \
180       N = nrow++ - 1; b->nz++; \
181       /* shift up all the later entries in this row */ \
182       for (ii=N; ii>=_i; ii--) { \
183         rp[ii+1] = rp[ii]; \
184         ap[ii+1] = ap[ii]; \
185       } \
186       rp[_i] = col1;  \
187       ap[_i] = value;  \
188       b_noinsert: ; \
189       bilen[row] = nrow; \
190 }
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatSetValues_MPIAIJ"
194 int MatSetValues_MPIAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
195 {
196   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
197   PetscScalar  value;
198   int          ierr,i,j,rstart = aij->rstart,rend = aij->rend;
199   int          cstart = aij->cstart,cend = aij->cend,row,col;
200   PetscTruth   roworiented = aij->roworiented;
201 
202   /* Some Variables required in the macro */
203   Mat          A = aij->A;
204   Mat_SeqAIJ   *a = (Mat_SeqAIJ*)A->data;
205   int          *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
206   PetscScalar  *aa = a->a;
207   PetscTruth   ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
208   Mat          B = aij->B;
209   Mat_SeqAIJ   *b = (Mat_SeqAIJ*)B->data;
210   int          *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
211   PetscScalar  *ba = b->a;
212 
213   int          *rp,ii,nrow,_i,rmax,N,col1,low,high,t;
214   int          nonew = a->nonew,shift = a->indexshift;
215   PetscScalar  *ap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_BOPT_g)
221     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
222 #endif
223     if (im[i] >= rstart && im[i] < rend) {
224       row = im[i] - rstart;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart && in[j] < cend){
227           col = in[j] - cstart;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           if (ignorezeroentries && value == 0.0) continue;
230           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqAIJ(aij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_BOPT_g)
234         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!aij->colmap) {
239               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
243 	    col--;
244 #else
245             col = aij->colmap[in[j]] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
248               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
251               B = aij->B;
252               b = (Mat_SeqAIJ*)B->data;
253               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
254               ba = b->a;
255             }
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           if (ignorezeroentries && value == 0.0) continue;
259           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqAIJ(aij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!aij->donotstash) {
265         if (roworiented) {
266           if (ignorezeroentries && v[i*n] == 0.0) continue;
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
268         } else {
269           if (ignorezeroentries && v[i] == 0.0) continue;
270           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
271         }
272       }
273     }
274   }
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatGetValues_MPIAIJ"
280 int MatGetValues_MPIAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
281 {
282   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
283   int        ierr,i,j,rstart = aij->rstart,rend = aij->rend;
284   int        cstart = aij->cstart,cend = aij->cend,row,col;
285 
286   PetscFunctionBegin;
287   for (i=0; i<m; i++) {
288     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
289     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
290     if (idxm[i] >= rstart && idxm[i] < rend) {
291       row = idxm[i] - rstart;
292       for (j=0; j<n; j++) {
293         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
294         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
295         if (idxn[j] >= cstart && idxn[j] < cend){
296           col = idxn[j] - cstart;
297           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
298         } else {
299           if (!aij->colmap) {
300             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
301           }
302 #if defined (PETSC_USE_CTABLE)
303           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
304           col --;
305 #else
306           col = aij->colmap[idxn[j]] - 1;
307 #endif
308           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
309           else {
310             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
311           }
312         }
313       }
314     } else {
315       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
316     }
317   }
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNCT__
322 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
323 int MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
324 {
325   Mat_MPIAIJ  *aij = (Mat_MPIAIJ*)mat->data;
326   int         ierr,nstash,reallocs;
327   InsertMode  addv;
328 
329   PetscFunctionBegin;
330   if (aij->donotstash) {
331     PetscFunctionReturn(0);
332   }
333 
334   /* make sure all processors are either in INSERTMODE or ADDMODE */
335   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
336   if (addv == (ADD_VALUES|INSERT_VALUES)) {
337     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
338   }
339   mat->insertmode = addv; /* in case this processor had no cache */
340 
341   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
342   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
343   PetscLogInfo(aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
344   PetscFunctionReturn(0);
345 }
346 
347 
348 #undef __FUNCT__
349 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
350 int MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
351 {
352   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
353   int         i,j,rstart,ncols,n,ierr,flg;
354   int         *row,*col,other_disassembled;
355   PetscScalar *val;
356   InsertMode  addv = mat->insertmode;
357 #if defined(PETSC_HAVE_SUPERLUDIST) || defined(PETSC_HAVE_SPOOLES)
358   PetscTruth  flag;
359 #endif
360 
361   PetscFunctionBegin;
362   if (!aij->donotstash) {
363     while (1) {
364       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
365       if (!flg) break;
366 
367       for (i=0; i<n;) {
368         /* Now identify the consecutive vals belonging to the same row */
369         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
370         if (j < n) ncols = j-i;
371         else       ncols = n-i;
372         /* Now assemble all these values with a single function call */
373         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
374         i = j;
375       }
376     }
377     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
378   }
379 
380   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
381   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
382 
383   /* determine if any processor has disassembled, if so we must
384      also disassemble ourselfs, in order that we may reassemble. */
385   /*
386      if nonzero structure of submatrix B cannot change then we know that
387      no processor disassembled thus we can skip this stuff
388   */
389   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
390     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
391     if (mat->was_assembled && !other_disassembled) {
392       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
393     }
394   }
395 
396   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
397     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
398   }
399   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
400   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
401 
402   if (aij->rowvalues) {
403     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
404     aij->rowvalues = 0;
405   }
406 #if defined(PETSC_HAVE_SUPERLUDIST)
407   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_superlu_dist",&flag);CHKERRQ(ierr);
408   if (flag) { ierr = MatUseSuperLU_DIST_MPIAIJ(mat);CHKERRQ(ierr); }
409 #endif
410 
411 #if defined(PETSC_HAVE_SPOOLES)
412   ierr = PetscOptionsHasName(mat->prefix,"-mat_aij_spooles",&flag);CHKERRQ(ierr);
413   if (flag) { ierr = MatUseSpooles_MPIAIJ(mat);CHKERRQ(ierr); }
414 #endif
415   PetscFunctionReturn(0);
416 }
417 
418 #undef __FUNCT__
419 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
420 int MatZeroEntries_MPIAIJ(Mat A)
421 {
422   Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data;
423   int        ierr;
424 
425   PetscFunctionBegin;
426   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
427   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatZeroRows_MPIAIJ"
433 int MatZeroRows_MPIAIJ(Mat A,IS is,PetscScalar *diag)
434 {
435   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
436   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
437   int            *nprocs,j,idx,nsends,row;
438   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
439   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
440   int            *lens,imdex,*lrows,*values,rstart=l->rstart;
441   MPI_Comm       comm = A->comm;
442   MPI_Request    *send_waits,*recv_waits;
443   MPI_Status     recv_status,*send_status;
444   IS             istmp;
445   PetscTruth     found;
446 
447   PetscFunctionBegin;
448   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
449   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
450 
451   /*  first count number of contributors to each processor */
452   ierr = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
453   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
454   ierr = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
455   for (i=0; i<N; i++) {
456     idx = rows[i];
457     found = PETSC_FALSE;
458     for (j=0; j<size; j++) {
459       if (idx >= owners[j] && idx < owners[j+1]) {
460         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
461       }
462     }
463     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
464   }
465   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
466 
467   /* inform other processors of number of messages and max length*/
468   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
469 
470   /* post receives:   */
471   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
472   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
473   for (i=0; i<nrecvs; i++) {
474     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
475   }
476 
477   /* do sends:
478       1) starts[i] gives the starting index in svalues for stuff going to
479          the ith processor
480   */
481   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
482   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
483   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
484   starts[0] = 0;
485   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
486   for (i=0; i<N; i++) {
487     svalues[starts[owner[i]]++] = rows[i];
488   }
489   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
490 
491   starts[0] = 0;
492   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
493   count = 0;
494   for (i=0; i<size; i++) {
495     if (nprocs[2*i+1]) {
496       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
497     }
498   }
499   ierr = PetscFree(starts);CHKERRQ(ierr);
500 
501   base = owners[rank];
502 
503   /*  wait on receives */
504   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
505   source = lens + nrecvs;
506   count  = nrecvs; slen = 0;
507   while (count) {
508     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
509     /* unpack receives into our local space */
510     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
511     source[imdex]  = recv_status.MPI_SOURCE;
512     lens[imdex]    = n;
513     slen          += n;
514     count--;
515   }
516   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
517 
518   /* move the data into the send scatter */
519   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
520   count = 0;
521   for (i=0; i<nrecvs; i++) {
522     values = rvalues + i*nmax;
523     for (j=0; j<lens[i]; j++) {
524       lrows[count++] = values[j] - base;
525     }
526   }
527   ierr = PetscFree(rvalues);CHKERRQ(ierr);
528   ierr = PetscFree(lens);CHKERRQ(ierr);
529   ierr = PetscFree(owner);CHKERRQ(ierr);
530   ierr = PetscFree(nprocs);CHKERRQ(ierr);
531 
532   /* actually zap the local rows */
533   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
534   PetscLogObjectParent(A,istmp);
535 
536   /*
537         Zero the required rows. If the "diagonal block" of the matrix
538      is square and the user wishes to set the diagonal we use seperate
539      code so that MatSetValues() is not called for each diagonal allocating
540      new memory, thus calling lots of mallocs and slowing things down.
541 
542        Contributed by: Mathew Knepley
543   */
544   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
545   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
546   if (diag && (l->A->M == l->A->N)) {
547     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
548   } else if (diag) {
549     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
550     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
551       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
552 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
553     }
554     for (i = 0; i < slen; i++) {
555       row  = lrows[i] + rstart;
556       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
557     }
558     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
559     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
560   } else {
561     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
562   }
563   ierr = ISDestroy(istmp);CHKERRQ(ierr);
564   ierr = PetscFree(lrows);CHKERRQ(ierr);
565 
566   /* wait on sends */
567   if (nsends) {
568     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
569     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
570     ierr = PetscFree(send_status);CHKERRQ(ierr);
571   }
572   ierr = PetscFree(send_waits);CHKERRQ(ierr);
573   ierr = PetscFree(svalues);CHKERRQ(ierr);
574 
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatMult_MPIAIJ"
580 int MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
581 {
582   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
583   int        ierr,nt;
584 
585   PetscFunctionBegin;
586   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
587   if (nt != A->n) {
588     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%d) and xx (%d)",A->n,nt);
589   }
590   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
591   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
592   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
593   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatMultAdd_MPIAIJ"
599 int MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
600 {
601   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
602   int        ierr;
603 
604   PetscFunctionBegin;
605   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
606   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
607   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
608   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
609   PetscFunctionReturn(0);
610 }
611 
612 #undef __FUNCT__
613 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
614 int MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
615 {
616   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
617   int        ierr;
618 
619   PetscFunctionBegin;
620   /* do nondiagonal part */
621   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
622   /* send it on its way */
623   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
624   /* do local part */
625   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
626   /* receive remote parts: note this assumes the values are not actually */
627   /* inserted in yy until the next line, which is true for my implementation*/
628   /* but is not perhaps always true. */
629   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
630   PetscFunctionReturn(0);
631 }
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
635 int MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
636 {
637   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
638   int        ierr;
639 
640   PetscFunctionBegin;
641   /* do nondiagonal part */
642   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
643   /* send it on its way */
644   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
645   /* do local part */
646   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
647   /* receive remote parts: note this assumes the values are not actually */
648   /* inserted in yy until the next line, which is true for my implementation*/
649   /* but is not perhaps always true. */
650   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*
655   This only works correctly for square matrices where the subblock A->A is the
656    diagonal block
657 */
658 #undef __FUNCT__
659 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
660 int MatGetDiagonal_MPIAIJ(Mat A,Vec v)
661 {
662   int        ierr;
663   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
664 
665   PetscFunctionBegin;
666   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
667   if (a->rstart != a->cstart || a->rend != a->cend) {
668     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
669   }
670   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #undef __FUNCT__
675 #define __FUNCT__ "MatScale_MPIAIJ"
676 int MatScale_MPIAIJ(PetscScalar *aa,Mat A)
677 {
678   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
679   int        ierr;
680 
681   PetscFunctionBegin;
682   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
683   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
684   PetscFunctionReturn(0);
685 }
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDestroy_MPIAIJ"
689 int MatDestroy_MPIAIJ(Mat mat)
690 {
691   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
692   int        ierr;
693 
694   PetscFunctionBegin;
695 #if defined(PETSC_USE_LOG)
696   PetscLogObjectState((PetscObject)mat,"Rows=%d, Cols=%d",mat->M,mat->N);
697 #endif
698   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
699   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
700   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
701   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
702 #if defined (PETSC_USE_CTABLE)
703   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
704 #else
705   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
706 #endif
707   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
708   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
709   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
710   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
711   ierr = PetscFree(aij);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 
715 extern int MatMPIAIJFactorInfo_SuperLu(Mat,PetscViewer);
716 extern int MatFactorInfo_Spooles(Mat,PetscViewer);
717 
718 #undef __FUNCT__
719 #define __FUNCT__ "MatView_MPIAIJ_Binary"
720 int MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
721 {
722   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
723   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
724   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
725   int               nz,fd,ierr,header[4],rank,size,*row_lengths,*range,rlen,i,tag = ((PetscObject)viewer)->tag;
726   int               nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
727   PetscScalar       *column_values;
728 
729   PetscFunctionBegin;
730   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
731   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
732   nz   = A->nz + B->nz;
733   if (rank == 0) {
734     header[0] = MAT_FILE_COOKIE;
735     header[1] = mat->M;
736     header[2] = mat->N;
737     ierr = MPI_Reduce(&nz,&header[3],1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
738     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
739     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,1);CHKERRQ(ierr);
740     /* get largest number of rows any processor has */
741     rlen = mat->m;
742     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
743     for (i=1; i<size; i++) {
744       rlen = PetscMax(rlen,range[i+1] - range[i]);
745     }
746   } else {
747     ierr = MPI_Reduce(&nz,0,1,MPI_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     rlen = mat->m;
749   }
750 
751   /* load up the local row counts */
752   ierr = PetscMalloc((rlen+1)*sizeof(int),&row_lengths);CHKERRQ(ierr);
753   for (i=0; i<mat->m; i++) {
754     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
755   }
756 
757   /* store the row lengths to the file */
758   if (rank == 0) {
759     MPI_Status status;
760     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,1);CHKERRQ(ierr);
761     for (i=1; i<size; i++) {
762       rlen = range[i+1] - range[i];
763       ierr = MPI_Recv(row_lengths,rlen,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
764       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,1);CHKERRQ(ierr);
765     }
766   } else {
767     ierr = MPI_Send(row_lengths,mat->m,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
768   }
769   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
770 
771   /* load up the local column indices */
772   nzmax = nz; /* )th processor needs space a largest processor needs */
773   ierr = MPI_Reduce(&nz,&nzmax,1,MPI_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
774   ierr = PetscMalloc((nzmax+1)*sizeof(int),&column_indices);CHKERRQ(ierr);
775   cnt  = 0;
776   for (i=0; i<mat->m; i++) {
777     for (j=B->i[i]; j<B->i[i+1]; j++) {
778       if ( (col = garray[B->j[j]]) > cstart) break;
779       column_indices[cnt++] = col;
780     }
781     for (k=A->i[i]; k<A->i[i+1]; k++) {
782       column_indices[cnt++] = A->j[k] + cstart;
783     }
784     for (; j<B->i[i+1]; j++) {
785       column_indices[cnt++] = garray[B->j[j]];
786     }
787   }
788   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
789 
790   /* store the column indices to the file */
791   if (rank == 0) {
792     MPI_Status status;
793     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,1);CHKERRQ(ierr);
794     for (i=1; i<size; i++) {
795       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
796       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
797       ierr = MPI_Recv(column_indices,rnz,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
798       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,1);CHKERRQ(ierr);
799     }
800   } else {
801     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
802     ierr = MPI_Send(column_indices,nz,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
803   }
804   ierr = PetscFree(column_indices);CHKERRQ(ierr);
805 
806   /* load up the local column values */
807   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
808   cnt  = 0;
809   for (i=0; i<mat->m; i++) {
810     for (j=B->i[i]; j<B->i[i+1]; j++) {
811       if ( garray[B->j[j]] > cstart) break;
812       column_values[cnt++] = B->a[j];
813     }
814     for (k=A->i[i]; k<A->i[i+1]; k++) {
815       column_values[cnt++] = A->a[k];
816     }
817     for (; j<B->i[i+1]; j++) {
818       column_values[cnt++] = B->a[j];
819     }
820   }
821   if (cnt != A->nz + B->nz) SETERRQ2(1,"Internal PETSc error: cnt = %d nz = %d",cnt,A->nz+B->nz);
822 
823   /* store the column values to the file */
824   if (rank == 0) {
825     MPI_Status status;
826     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,1);CHKERRQ(ierr);
827     for (i=1; i<size; i++) {
828       ierr = MPI_Recv(&rnz,1,MPI_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
829       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %d nzmax = %d",nz,nzmax);
830       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
831       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,1);CHKERRQ(ierr);
832     }
833   } else {
834     ierr = MPI_Send(&nz,1,MPI_INT,0,tag,mat->comm);CHKERRQ(ierr);
835     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
836   }
837   ierr = PetscFree(column_values);CHKERRQ(ierr);
838   PetscFunctionReturn(0);
839 }
840 
841 #undef __FUNCT__
842 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
843 int MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
844 {
845   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
846   Mat_SeqAIJ*       C = (Mat_SeqAIJ*)aij->A->data;
847   int               ierr,shift = C->indexshift,rank = aij->rank,size = aij->size;
848   PetscTruth        isdraw,isascii,flg,isbinary;
849   PetscViewer       sviewer;
850   PetscViewerFormat format;
851 
852   PetscFunctionBegin;
853   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
856   if (isascii) {
857     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
858     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
859       MatInfo info;
860       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
861       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
862       ierr = PetscOptionsHasName(PETSC_NULL,"-mat_aij_no_inode",&flg);CHKERRQ(ierr);
863       if (flg) {
864         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, not using I-node routines\n",
865 					      rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
866       } else {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d mem %d, using I-node routines\n",
868 		    rank,mat->m,(int)info.nz_used,(int)info.nz_allocated,(int)info.memory);CHKERRQ(ierr);
869       }
870       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
872       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
873       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used);CHKERRQ(ierr);
874       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
875       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
876       PetscFunctionReturn(0);
877     } else if (format == PETSC_VIEWER_ASCII_INFO) {
878       PetscFunctionReturn(0);
879     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
880 #if defined(PETSC_HAVE_SUPERLUDIST) && !defined(PETSC_USE_SINGLE)
881       ierr = MatMPIAIJFactorInfo_SuperLu(mat,viewer);CHKERRQ(ierr);
882 #endif
883 #if defined(PETSC_HAVE_SPOOLES) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
884       ierr = MatFactorInfo_Spooles(mat,viewer);CHKERRQ(ierr);
885 #endif
886 
887       PetscFunctionReturn(0);
888     }
889   } else if (isbinary) {
890     if (size == 1) {
891       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
892       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
893     } else {
894       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
895     }
896     PetscFunctionReturn(0);
897   } else if (isdraw) {
898     PetscDraw  draw;
899     PetscTruth isnull;
900     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
901     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
902   }
903 
904   if (size == 1) {
905     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
906     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
907   } else {
908     /* assemble the entire matrix onto first processor. */
909     Mat         A;
910     Mat_SeqAIJ *Aloc;
911     int         M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
912     PetscScalar *a;
913 
914     if (!rank) {
915       ierr = MatCreateMPIAIJ(mat->comm,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
916     } else {
917       ierr = MatCreateMPIAIJ(mat->comm,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
918     }
919     PetscLogObjectParent(mat,A);
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqAIJ*)aij->A->data;
923     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     row = aij->rstart;
925     for (i=0; i<ai[m]+shift; i++) {aj[i] += aij->cstart + shift;}
926     for (i=0; i<m; i++) {
927       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
928       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
929     }
930     aj = Aloc->j;
931     for (i=0; i<ai[m]+shift; i++) {aj[i] -= aij->cstart + shift;}
932 
933     /* copy over the B part */
934     Aloc = (Mat_SeqAIJ*)aij->B->data;
935     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
936     row  = aij->rstart;
937     ierr = PetscMalloc((ai[m]+1)*sizeof(int),&cols);CHKERRQ(ierr);
938     ct   = cols;
939     for (i=0; i<ai[m]+shift; i++) {cols[i] = aij->garray[aj[i]+shift];}
940     for (i=0; i<m; i++) {
941       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
942       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
943     }
944     ierr = PetscFree(ct);CHKERRQ(ierr);
945     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
946     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
947     /*
948        Everyone has to call to draw the matrix since the graphics waits are
949        synchronized across all processors that share the PetscDraw object
950     */
951     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
952     if (!rank) {
953       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
954       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
955     }
956     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
957     ierr = MatDestroy(A);CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatView_MPIAIJ"
964 int MatView_MPIAIJ(Mat mat,PetscViewer viewer)
965 {
966   int        ierr;
967   PetscTruth isascii,isdraw,issocket,isbinary;
968 
969   PetscFunctionBegin;
970   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
971   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
974   if (isascii || isdraw || isbinary || issocket) {
975     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
976   } else {
977     SETERRQ1(1,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
978   }
979   PetscFunctionReturn(0);
980 }
981 
982 
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatRelax_MPIAIJ"
986 int MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
987 {
988   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
989   int          ierr;
990   Vec          bb1;
991   PetscScalar  mone=-1.0;
992 
993   PetscFunctionBegin;
994   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
995 
996   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
997 
998   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
999     if (flag & SOR_ZERO_INITIAL_GUESS) {
1000       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1001       its--;
1002     }
1003 
1004     while (its--) {
1005       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1006       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1007 
1008       /* update rhs: bb1 = bb - B*x */
1009       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1010       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1011 
1012       /* local sweep */
1013       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1014       CHKERRQ(ierr);
1015     }
1016   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1017     if (flag & SOR_ZERO_INITIAL_GUESS) {
1018       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1019       its--;
1020     }
1021     while (its--) {
1022       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1024 
1025       /* update rhs: bb1 = bb - B*x */
1026       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1027       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1028 
1029       /* local sweep */
1030       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1031       CHKERRQ(ierr);
1032     }
1033   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1034     if (flag & SOR_ZERO_INITIAL_GUESS) {
1035       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1036       its--;
1037     }
1038     while (its--) {
1039       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1041 
1042       /* update rhs: bb1 = bb - B*x */
1043       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1044       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1045 
1046       /* local sweep */
1047       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1048       CHKERRQ(ierr);
1049     }
1050   } else {
1051     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1052   }
1053 
1054   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1060 int MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1061 {
1062   Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data;
1063   Mat        A = mat->A,B = mat->B;
1064   int        ierr;
1065   PetscReal  isend[5],irecv[5];
1066 
1067   PetscFunctionBegin;
1068   info->block_size     = 1.0;
1069   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1070   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1071   isend[3] = info->memory;  isend[4] = info->mallocs;
1072   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1073   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1074   isend[3] += info->memory;  isend[4] += info->mallocs;
1075   if (flag == MAT_LOCAL) {
1076     info->nz_used      = isend[0];
1077     info->nz_allocated = isend[1];
1078     info->nz_unneeded  = isend[2];
1079     info->memory       = isend[3];
1080     info->mallocs      = isend[4];
1081   } else if (flag == MAT_GLOBAL_MAX) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else if (flag == MAT_GLOBAL_SUM) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   }
1096   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1097   info->fill_ratio_needed = 0;
1098   info->factor_mallocs    = 0;
1099   info->rows_global       = (double)matin->M;
1100   info->columns_global    = (double)matin->N;
1101   info->rows_local        = (double)matin->m;
1102   info->columns_local     = (double)matin->N;
1103 
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatSetOption_MPIAIJ"
1109 int MatSetOption_MPIAIJ(Mat A,MatOption op)
1110 {
1111   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
1112   int        ierr;
1113 
1114   PetscFunctionBegin;
1115   switch (op) {
1116   case MAT_NO_NEW_NONZERO_LOCATIONS:
1117   case MAT_YES_NEW_NONZERO_LOCATIONS:
1118   case MAT_COLUMNS_UNSORTED:
1119   case MAT_COLUMNS_SORTED:
1120   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1121   case MAT_KEEP_ZEROED_ROWS:
1122   case MAT_NEW_NONZERO_LOCATION_ERR:
1123   case MAT_USE_INODES:
1124   case MAT_DO_NOT_USE_INODES:
1125   case MAT_IGNORE_ZERO_ENTRIES:
1126     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1127     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1128     break;
1129   case MAT_ROW_ORIENTED:
1130     a->roworiented = PETSC_TRUE;
1131     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1132     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1133     break;
1134   case MAT_ROWS_SORTED:
1135   case MAT_ROWS_UNSORTED:
1136   case MAT_YES_NEW_DIAGONALS:
1137   case MAT_USE_SINGLE_PRECISION_SOLVES:
1138     PetscLogInfo(A,"MatSetOption_MPIAIJ:Option ignored\n");
1139     break;
1140   case MAT_COLUMN_ORIENTED:
1141     a->roworiented = PETSC_FALSE;
1142     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1143     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1144     break;
1145   case MAT_IGNORE_OFF_PROC_ENTRIES:
1146     a->donotstash = PETSC_TRUE;
1147     break;
1148   case MAT_NO_NEW_DIAGONALS:
1149     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1150   default:
1151     SETERRQ(PETSC_ERR_SUP,"unknown option");
1152   }
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatGetRow_MPIAIJ"
1158 int MatGetRow_MPIAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1159 {
1160   Mat_MPIAIJ   *mat = (Mat_MPIAIJ*)matin->data;
1161   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1162   int          i,ierr,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1163   int          nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1164   int          *cmap,*idx_p;
1165 
1166   PetscFunctionBegin;
1167   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1168   mat->getrowactive = PETSC_TRUE;
1169 
1170   if (!mat->rowvalues && (idx || v)) {
1171     /*
1172         allocate enough space to hold information from the longest row.
1173     */
1174     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1175     int     max = 1,tmp;
1176     for (i=0; i<matin->m; i++) {
1177       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1178       if (max < tmp) { max = tmp; }
1179     }
1180     ierr = PetscMalloc(max*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1181     mat->rowindices = (int*)(mat->rowvalues + max);
1182   }
1183 
1184   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1185   lrow = row - rstart;
1186 
1187   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1188   if (!v)   {pvA = 0; pvB = 0;}
1189   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1190   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1191   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1192   nztot = nzA + nzB;
1193 
1194   cmap  = mat->garray;
1195   if (v  || idx) {
1196     if (nztot) {
1197       /* Sort by increasing column numbers, assuming A and B already sorted */
1198       int imark = -1;
1199       if (v) {
1200         *v = v_p = mat->rowvalues;
1201         for (i=0; i<nzB; i++) {
1202           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1203           else break;
1204         }
1205         imark = i;
1206         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1207         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1208       }
1209       if (idx) {
1210         *idx = idx_p = mat->rowindices;
1211         if (imark > -1) {
1212           for (i=0; i<imark; i++) {
1213             idx_p[i] = cmap[cworkB[i]];
1214           }
1215         } else {
1216           for (i=0; i<nzB; i++) {
1217             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1218             else break;
1219           }
1220           imark = i;
1221         }
1222         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1223         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1224       }
1225     } else {
1226       if (idx) *idx = 0;
1227       if (v)   *v   = 0;
1228     }
1229   }
1230   *nz = nztot;
1231   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1232   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1238 int MatRestoreRow_MPIAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1239 {
1240   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1241 
1242   PetscFunctionBegin;
1243   if (aij->getrowactive == PETSC_FALSE) {
1244     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1245   }
1246   aij->getrowactive = PETSC_FALSE;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatNorm_MPIAIJ"
1252 int MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1253 {
1254   Mat_MPIAIJ   *aij = (Mat_MPIAIJ*)mat->data;
1255   Mat_SeqAIJ   *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1256   int          ierr,i,j,cstart = aij->cstart,shift = amat->indexshift;
1257   PetscReal    sum = 0.0;
1258   PetscScalar  *v;
1259 
1260   PetscFunctionBegin;
1261   if (aij->size == 1) {
1262     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1263   } else {
1264     if (type == NORM_FROBENIUS) {
1265       v = amat->a;
1266       for (i=0; i<amat->nz; i++) {
1267 #if defined(PETSC_USE_COMPLEX)
1268         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1269 #else
1270         sum += (*v)*(*v); v++;
1271 #endif
1272       }
1273       v = bmat->a;
1274       for (i=0; i<bmat->nz; i++) {
1275 #if defined(PETSC_USE_COMPLEX)
1276         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1277 #else
1278         sum += (*v)*(*v); v++;
1279 #endif
1280       }
1281       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1282       *norm = sqrt(*norm);
1283     } else if (type == NORM_1) { /* max column norm */
1284       PetscReal *tmp,*tmp2;
1285       int    *jj,*garray = aij->garray;
1286       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1287       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1288       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1289       *norm = 0.0;
1290       v = amat->a; jj = amat->j;
1291       for (j=0; j<amat->nz; j++) {
1292         tmp[cstart + *jj++ + shift] += PetscAbsScalar(*v);  v++;
1293       }
1294       v = bmat->a; jj = bmat->j;
1295       for (j=0; j<bmat->nz; j++) {
1296         tmp[garray[*jj++ + shift]] += PetscAbsScalar(*v); v++;
1297       }
1298       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1299       for (j=0; j<mat->N; j++) {
1300         if (tmp2[j] > *norm) *norm = tmp2[j];
1301       }
1302       ierr = PetscFree(tmp);CHKERRQ(ierr);
1303       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1304     } else if (type == NORM_INFINITY) { /* max row norm */
1305       PetscReal ntemp = 0.0;
1306       for (j=0; j<aij->A->m; j++) {
1307         v = amat->a + amat->i[j] + shift;
1308         sum = 0.0;
1309         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1310           sum += PetscAbsScalar(*v); v++;
1311         }
1312         v = bmat->a + bmat->i[j] + shift;
1313         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1314           sum += PetscAbsScalar(*v); v++;
1315         }
1316         if (sum > ntemp) ntemp = sum;
1317       }
1318       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1319     } else {
1320       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatTranspose_MPIAIJ"
1328 int MatTranspose_MPIAIJ(Mat A,Mat *matout)
1329 {
1330   Mat_MPIAIJ   *a = (Mat_MPIAIJ*)A->data;
1331   Mat_SeqAIJ   *Aloc = (Mat_SeqAIJ*)a->A->data;
1332   int          ierr,shift = Aloc->indexshift;
1333   int          M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1334   Mat          B;
1335   PetscScalar  *array;
1336 
1337   PetscFunctionBegin;
1338   if (!matout && M != N) {
1339     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1340   }
1341 
1342   ierr = MatCreateMPIAIJ(A->comm,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1343 
1344   /* copy over the A part */
1345   Aloc = (Mat_SeqAIJ*)a->A->data;
1346   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1347   row = a->rstart;
1348   for (i=0; i<ai[m]+shift; i++) {aj[i] += a->cstart + shift;}
1349   for (i=0; i<m; i++) {
1350     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1351     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1352   }
1353   aj = Aloc->j;
1354   for (i=0; i<ai[m]+shift; i++) {aj[i] -= a->cstart + shift;}
1355 
1356   /* copy over the B part */
1357   Aloc = (Mat_SeqAIJ*)a->B->data;
1358   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1359   row  = a->rstart;
1360   ierr = PetscMalloc((1+ai[m]-shift)*sizeof(int),&cols);CHKERRQ(ierr);
1361   ct   = cols;
1362   for (i=0; i<ai[m]+shift; i++) {cols[i] = a->garray[aj[i]+shift];}
1363   for (i=0; i<m; i++) {
1364     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1365     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1366   }
1367   ierr = PetscFree(ct);CHKERRQ(ierr);
1368   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1369   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1370   if (matout) {
1371     *matout = B;
1372   } else {
1373     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1374   }
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1380 int MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1381 {
1382   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1383   Mat        a = aij->A,b = aij->B;
1384   int        ierr,s1,s2,s3;
1385 
1386   PetscFunctionBegin;
1387   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1388   if (rr) {
1389     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1390     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1391     /* Overlap communication with computation. */
1392     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1393   }
1394   if (ll) {
1395     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1397     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1398   }
1399   /* scale  the diagonal block */
1400   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1401 
1402   if (rr) {
1403     /* Do a scatter end and then right scale the off-diagonal block */
1404     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1405     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1406   }
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1414 int MatPrintHelp_MPIAIJ(Mat A)
1415 {
1416   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1417   int        ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatGetBlockSize_MPIAIJ"
1428 int MatGetBlockSize_MPIAIJ(Mat A,int *bs)
1429 {
1430   PetscFunctionBegin;
1431   *bs = 1;
1432   PetscFunctionReturn(0);
1433 }
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1436 int MatSetUnfactored_MPIAIJ(Mat A)
1437 {
1438   Mat_MPIAIJ *a   = (Mat_MPIAIJ*)A->data;
1439   int        ierr;
1440 
1441   PetscFunctionBegin;
1442   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatEqual_MPIAIJ"
1448 int MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1449 {
1450   Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1451   Mat        a,b,c,d;
1452   PetscTruth flg;
1453   int        ierr;
1454 
1455   PetscFunctionBegin;
1456   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1457   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1458   a = matA->A; b = matA->B;
1459   c = matB->A; d = matB->B;
1460 
1461   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1462   if (flg == PETSC_TRUE) {
1463     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1464   }
1465   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatCopy_MPIAIJ"
1471 int MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1472 {
1473   int        ierr;
1474   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
1475   Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data;
1476   PetscTruth flg;
1477 
1478   PetscFunctionBegin;
1479   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg);CHKERRQ(ierr);
1480   if (str != SAME_NONZERO_PATTERN || !flg) {
1481     /* because of the column compression in the off-processor part of the matrix a->B,
1482        the number of columns in a->B and b->B may be different, hence we cannot call
1483        the MatCopy() directly on the two parts. If need be, we can provide a more
1484        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1485        then copying the submatrices */
1486     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1487   } else {
1488     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1489     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1496 int MatSetUpPreallocation_MPIAIJ(Mat A)
1497 {
1498   int        ierr;
1499 
1500   PetscFunctionBegin;
1501   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 EXTERN int MatDuplicate_MPIAIJ(Mat,MatDuplicateOption,Mat *);
1506 EXTERN int MatIncreaseOverlap_MPIAIJ(Mat,int,IS *,int);
1507 EXTERN int MatFDColoringCreate_MPIAIJ(Mat,ISColoring,MatFDColoring);
1508 EXTERN int MatGetSubMatrices_MPIAIJ (Mat,int,IS *,IS *,MatReuse,Mat **);
1509 EXTERN int MatGetSubMatrix_MPIAIJ (Mat,IS,IS,int,MatReuse,Mat *);
1510 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1511 EXTERN int MatLUFactorSymbolic_MPIAIJ_TFS(Mat,IS,IS,MatLUInfo*,Mat*);
1512 #endif
1513 
1514 #include "petscblaslapack.h"
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatAXPY_MPIAIJ"
1518 int MatAXPY_MPIAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str)
1519 {
1520   int        ierr,one;
1521   Mat_MPIAIJ *xx  = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1522   Mat_SeqAIJ *x,*y;
1523 
1524   PetscFunctionBegin;
1525   if (str == SAME_NONZERO_PATTERN) {
1526     x  = (Mat_SeqAIJ *)xx->A->data;
1527     y  = (Mat_SeqAIJ *)yy->A->data;
1528     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1529     x  = (Mat_SeqAIJ *)xx->B->data;
1530     y  = (Mat_SeqAIJ *)yy->B->data;
1531     BLaxpy_(&x->nz,a,x->a,&one,y->a,&one);
1532   } else {
1533     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 /* -------------------------------------------------------------------*/
1539 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1540        MatGetRow_MPIAIJ,
1541        MatRestoreRow_MPIAIJ,
1542        MatMult_MPIAIJ,
1543        MatMultAdd_MPIAIJ,
1544        MatMultTranspose_MPIAIJ,
1545        MatMultTransposeAdd_MPIAIJ,
1546        0,
1547        0,
1548        0,
1549        0,
1550        0,
1551        0,
1552        MatRelax_MPIAIJ,
1553        MatTranspose_MPIAIJ,
1554        MatGetInfo_MPIAIJ,
1555        MatEqual_MPIAIJ,
1556        MatGetDiagonal_MPIAIJ,
1557        MatDiagonalScale_MPIAIJ,
1558        MatNorm_MPIAIJ,
1559        MatAssemblyBegin_MPIAIJ,
1560        MatAssemblyEnd_MPIAIJ,
1561        0,
1562        MatSetOption_MPIAIJ,
1563        MatZeroEntries_MPIAIJ,
1564        MatZeroRows_MPIAIJ,
1565 #if !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_SINGLE)
1566        MatLUFactorSymbolic_MPIAIJ_TFS,
1567 #else
1568        0,
1569 #endif
1570        0,
1571        0,
1572        0,
1573        MatSetUpPreallocation_MPIAIJ,
1574        0,
1575        0,
1576        0,
1577        0,
1578        MatDuplicate_MPIAIJ,
1579        0,
1580        0,
1581        0,
1582        0,
1583        MatAXPY_MPIAIJ,
1584        MatGetSubMatrices_MPIAIJ,
1585        MatIncreaseOverlap_MPIAIJ,
1586        MatGetValues_MPIAIJ,
1587        MatCopy_MPIAIJ,
1588        MatPrintHelp_MPIAIJ,
1589        MatScale_MPIAIJ,
1590        0,
1591        0,
1592        0,
1593        MatGetBlockSize_MPIAIJ,
1594        0,
1595        0,
1596        0,
1597        0,
1598        MatFDColoringCreate_MPIAIJ,
1599        0,
1600        MatSetUnfactored_MPIAIJ,
1601        0,
1602        0,
1603        MatGetSubMatrix_MPIAIJ,
1604        MatDestroy_MPIAIJ,
1605        MatView_MPIAIJ,
1606        MatGetPetscMaps_Petsc,
1607        0,
1608        0,
1609        0,
1610        0,
1611        0,
1612        0,
1613        0,
1614        0,
1615        MatSetColoring_MPIAIJ,
1616        MatSetValuesAdic_MPIAIJ,
1617        MatSetValuesAdifor_MPIAIJ
1618 };
1619 
1620 /* ----------------------------------------------------------------------------------------*/
1621 
1622 EXTERN_C_BEGIN
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1625 int MatStoreValues_MPIAIJ(Mat mat)
1626 {
1627   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1628   int        ierr;
1629 
1630   PetscFunctionBegin;
1631   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1632   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 EXTERN_C_END
1636 
1637 EXTERN_C_BEGIN
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1640 int MatRetrieveValues_MPIAIJ(Mat mat)
1641 {
1642   Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
1643   int        ierr;
1644 
1645   PetscFunctionBegin;
1646   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1647   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1648   PetscFunctionReturn(0);
1649 }
1650 EXTERN_C_END
1651 
1652 #include "petscpc.h"
1653 EXTERN_C_BEGIN
1654 EXTERN int MatGetDiagonalBlock_MPIAIJ(Mat,PetscTruth *,MatReuse,Mat *);
1655 EXTERN_C_END
1656 
1657 EXTERN_C_BEGIN
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatCreate_MPIAIJ"
1660 int MatCreate_MPIAIJ(Mat B)
1661 {
1662   Mat_MPIAIJ *b;
1663   int        ierr,i,size;
1664 
1665   PetscFunctionBegin;
1666   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1667 
1668   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
1669   B->data         = (void*)b;
1670   ierr            = PetscMemzero(b,sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
1671   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1672   B->factor       = 0;
1673   B->assembled    = PETSC_FALSE;
1674   B->mapping      = 0;
1675 
1676   B->insertmode      = NOT_SET_VALUES;
1677   b->size            = size;
1678   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1679 
1680   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
1681   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
1682 
1683   /* the information in the maps duplicates the information computed below, eventually
1684      we should remove the duplicate information that is not contained in the maps */
1685   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1686   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1687 
1688   /* build local table of row and column ownerships */
1689   ierr = PetscMalloc(2*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1690   PetscLogObjectMemory(B,2*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));
1691   b->cowners = b->rowners + b->size + 2;
1692   ierr = MPI_Allgather(&B->m,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1693   b->rowners[0] = 0;
1694   for (i=2; i<=b->size; i++) {
1695     b->rowners[i] += b->rowners[i-1];
1696   }
1697   b->rstart = b->rowners[b->rank];
1698   b->rend   = b->rowners[b->rank+1];
1699   ierr = MPI_Allgather(&B->n,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1700   b->cowners[0] = 0;
1701   for (i=2; i<=b->size; i++) {
1702     b->cowners[i] += b->cowners[i-1];
1703   }
1704   b->cstart = b->cowners[b->rank];
1705   b->cend   = b->cowners[b->rank+1];
1706 
1707   /* build cache for off array entries formed */
1708   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1709   b->donotstash  = PETSC_FALSE;
1710   b->colmap      = 0;
1711   b->garray      = 0;
1712   b->roworiented = PETSC_TRUE;
1713 
1714   /* stuff used for matrix vector multiply */
1715   b->lvec      = PETSC_NULL;
1716   b->Mvctx     = PETSC_NULL;
1717 
1718   /* stuff for MatGetRow() */
1719   b->rowindices   = 0;
1720   b->rowvalues    = 0;
1721   b->getrowactive = PETSC_FALSE;
1722 
1723   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1724                                      "MatStoreValues_MPIAIJ",
1725                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
1726   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1727                                      "MatRetrieveValues_MPIAIJ",
1728                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
1729   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1730                                      "MatGetDiagonalBlock_MPIAIJ",
1731                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
1732 
1733   PetscFunctionReturn(0);
1734 }
1735 EXTERN_C_END
1736 
1737 #undef __FUNCT__
1738 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1739 int MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1740 {
1741   Mat        mat;
1742   Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1743   int        ierr;
1744 
1745   PetscFunctionBegin;
1746   *newmat       = 0;
1747   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1748   ierr = MatSetType(mat,MATMPIAIJ);CHKERRQ(ierr);
1749   a    = (Mat_MPIAIJ*)mat->data;
1750   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1751   mat->factor       = matin->factor;
1752   mat->assembled    = PETSC_TRUE;
1753   mat->insertmode   = NOT_SET_VALUES;
1754   mat->preallocated = PETSC_TRUE;
1755 
1756   a->rstart       = oldmat->rstart;
1757   a->rend         = oldmat->rend;
1758   a->cstart       = oldmat->cstart;
1759   a->cend         = oldmat->cend;
1760   a->size         = oldmat->size;
1761   a->rank         = oldmat->rank;
1762   a->donotstash   = oldmat->donotstash;
1763   a->roworiented  = oldmat->roworiented;
1764   a->rowindices   = 0;
1765   a->rowvalues    = 0;
1766   a->getrowactive = PETSC_FALSE;
1767 
1768   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1769   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1770   if (oldmat->colmap) {
1771 #if defined (PETSC_USE_CTABLE)
1772     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1773 #else
1774     ierr = PetscMalloc((mat->N)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1775     PetscLogObjectMemory(mat,(mat->N)*sizeof(int));
1776     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(int));CHKERRQ(ierr);
1777 #endif
1778   } else a->colmap = 0;
1779   if (oldmat->garray) {
1780     int len;
1781     len  = oldmat->B->n;
1782     ierr = PetscMalloc((len+1)*sizeof(int),&a->garray);CHKERRQ(ierr);
1783     PetscLogObjectMemory(mat,len*sizeof(int));
1784     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); }
1785   } else a->garray = 0;
1786 
1787   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1788   PetscLogObjectParent(mat,a->lvec);
1789   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1790   PetscLogObjectParent(mat,a->Mvctx);
1791   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1792   PetscLogObjectParent(mat,a->A);
1793   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1794   PetscLogObjectParent(mat,a->B);
1795   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1796   *newmat = mat;
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #include "petscsys.h"
1801 
1802 EXTERN_C_BEGIN
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatLoad_MPIAIJ"
1805 int MatLoad_MPIAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1806 {
1807   Mat          A;
1808   PetscScalar  *vals,*svals;
1809   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1810   MPI_Status   status;
1811   int          i,nz,ierr,j,rstart,rend,fd;
1812   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,maxnz,*cols;
1813   int          *ourlens,*sndcounts = 0,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1814   int          tag = ((PetscObject)viewer)->tag,cend,cstart,n;
1815 
1816   PetscFunctionBegin;
1817   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1818   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1819   if (!rank) {
1820     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1821     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1822     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1823     if (header[3] < 0) {
1824       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix in special format on disk, cannot load as MPIAIJ");
1825     }
1826   }
1827 
1828   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
1829   M = header[1]; N = header[2];
1830   /* determine ownership of all rows */
1831   m = M/size + ((M % size) > rank);
1832   ierr = PetscMalloc((size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
1833   ierr = MPI_Allgather(&m,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
1834   rowners[0] = 0;
1835   for (i=2; i<=size; i++) {
1836     rowners[i] += rowners[i-1];
1837   }
1838   rstart = rowners[rank];
1839   rend   = rowners[rank+1];
1840 
1841   /* distribute row lengths to all processors */
1842   ierr    = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&ourlens);CHKERRQ(ierr);
1843   offlens = ourlens + (rend-rstart);
1844   if (!rank) {
1845     ierr = PetscMalloc(M*sizeof(int),&rowlengths);CHKERRQ(ierr);
1846     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
1847     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
1848     for (i=0; i<size; i++) sndcounts[i] = rowners[i+1] - rowners[i];
1849     ierr = MPI_Scatterv(rowlengths,sndcounts,rowners,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1850     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
1851   } else {
1852     ierr = MPI_Scatterv(0,0,0,MPI_INT,ourlens,rend-rstart,MPI_INT,0,comm);CHKERRQ(ierr);
1853   }
1854 
1855   if (!rank) {
1856     /* calculate the number of nonzeros on each processor */
1857     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
1858     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
1859     for (i=0; i<size; i++) {
1860       for (j=rowners[i]; j< rowners[i+1]; j++) {
1861         procsnz[i] += rowlengths[j];
1862       }
1863     }
1864     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1865 
1866     /* determine max buffer needed and allocate it */
1867     maxnz = 0;
1868     for (i=0; i<size; i++) {
1869       maxnz = PetscMax(maxnz,procsnz[i]);
1870     }
1871     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
1872 
1873     /* read in my part of the matrix column indices  */
1874     nz   = procsnz[0];
1875     ierr = PetscMalloc(nz*sizeof(int),&mycols);CHKERRQ(ierr);
1876     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1877 
1878     /* read in every one elses and ship off */
1879     for (i=1; i<size; i++) {
1880       nz   = procsnz[i];
1881       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1882       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
1883     }
1884     ierr = PetscFree(cols);CHKERRQ(ierr);
1885   } else {
1886     /* determine buffer space needed for message */
1887     nz = 0;
1888     for (i=0; i<m; i++) {
1889       nz += ourlens[i];
1890     }
1891     ierr = PetscMalloc((nz+1)*sizeof(int),&mycols);CHKERRQ(ierr);
1892 
1893     /* receive message of column indices*/
1894     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
1895     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
1896     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1897   }
1898 
1899   /* determine column ownership if matrix is not square */
1900   if (N != M) {
1901     n      = N/size + ((N % size) > rank);
1902     ierr   = MPI_Scan(&n,&cend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
1903     cstart = cend - n;
1904   } else {
1905     cstart = rstart;
1906     cend   = rend;
1907     n      = cend - cstart;
1908   }
1909 
1910   /* loop over local rows, determining number of off diagonal entries */
1911   ierr = PetscMemzero(offlens,m*sizeof(int));CHKERRQ(ierr);
1912   jj = 0;
1913   for (i=0; i<m; i++) {
1914     for (j=0; j<ourlens[i]; j++) {
1915       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1916       jj++;
1917     }
1918   }
1919 
1920   /* create our matrix */
1921   for (i=0; i<m; i++) {
1922     ourlens[i] -= offlens[i];
1923   }
1924   ierr = MatCreateMPIAIJ(comm,m,n,M,N,0,ourlens,0,offlens,newmat);CHKERRQ(ierr);
1925   A = *newmat;
1926   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1927   for (i=0; i<m; i++) {
1928     ourlens[i] += offlens[i];
1929   }
1930 
1931   if (!rank) {
1932     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1933 
1934     /* read in my part of the matrix numerical values  */
1935     nz   = procsnz[0];
1936     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1937 
1938     /* insert into matrix */
1939     jj      = rstart;
1940     smycols = mycols;
1941     svals   = vals;
1942     for (i=0; i<m; i++) {
1943       ierr = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1944       smycols += ourlens[i];
1945       svals   += ourlens[i];
1946       jj++;
1947     }
1948 
1949     /* read in other processors and ship out */
1950     for (i=1; i<size; i++) {
1951       nz   = procsnz[i];
1952       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1953       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1954     }
1955     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1956   } else {
1957     /* receive numeric values */
1958     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1959 
1960     /* receive message of values*/
1961     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1962     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1963     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1964 
1965     /* insert into matrix */
1966     jj      = rstart;
1967     smycols = mycols;
1968     svals   = vals;
1969     for (i=0; i<m; i++) {
1970       ierr     = MatSetValues(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1971       smycols += ourlens[i];
1972       svals   += ourlens[i];
1973       jj++;
1974     }
1975   }
1976   ierr = PetscFree(ourlens);CHKERRQ(ierr);
1977   ierr = PetscFree(vals);CHKERRQ(ierr);
1978   ierr = PetscFree(mycols);CHKERRQ(ierr);
1979   ierr = PetscFree(rowners);CHKERRQ(ierr);
1980 
1981   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 EXTERN_C_END
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
1989 /*
1990     Not great since it makes two copies of the submatrix, first an SeqAIJ
1991   in local and then by concatenating the local matrices the end result.
1992   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
1993 */
1994 int MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,int csize,MatReuse call,Mat *newmat)
1995 {
1996   int          ierr,i,m,n,rstart,row,rend,nz,*cwork,size,rank,j;
1997   int          *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1998   Mat          *local,M,Mreuse;
1999   PetscScalar  *vwork,*aa;
2000   MPI_Comm     comm = mat->comm;
2001   Mat_SeqAIJ   *aij;
2002 
2003 
2004   PetscFunctionBegin;
2005   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2006   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2007 
2008   if (call ==  MAT_REUSE_MATRIX) {
2009     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2010     if (!Mreuse) SETERRQ(1,"Submatrix passed in was not used before, cannot reuse");
2011     local = &Mreuse;
2012     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2013   } else {
2014     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2015     Mreuse = *local;
2016     ierr   = PetscFree(local);CHKERRQ(ierr);
2017   }
2018 
2019   /*
2020       m - number of local rows
2021       n - number of columns (same on all processors)
2022       rstart - first row in new global matrix generated
2023   */
2024   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2025   if (call == MAT_INITIAL_MATRIX) {
2026     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2027     if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2028     ii  = aij->i;
2029     jj  = aij->j;
2030 
2031     /*
2032         Determine the number of non-zeros in the diagonal and off-diagonal
2033         portions of the matrix in order to do correct preallocation
2034     */
2035 
2036     /* first get start and end of "diagonal" columns */
2037     if (csize == PETSC_DECIDE) {
2038       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2039       if (mglobal == n) { /* square matrix */
2040 	nlocal = m;
2041       } else {
2042         nlocal = n/size + ((n % size) > rank);
2043       }
2044     } else {
2045       nlocal = csize;
2046     }
2047     ierr   = MPI_Scan(&nlocal,&rend,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2048     rstart = rend - nlocal;
2049     if (rank == size - 1 && rend != n) {
2050       SETERRQ(1,"Local column sizes do not add up to total number of columns");
2051     }
2052 
2053     /* next, compute all the lengths */
2054     ierr  = PetscMalloc((2*m+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2055     olens = dlens + m;
2056     for (i=0; i<m; i++) {
2057       jend = ii[i+1] - ii[i];
2058       olen = 0;
2059       dlen = 0;
2060       for (j=0; j<jend; j++) {
2061         if (*jj < rstart || *jj >= rend) olen++;
2062         else dlen++;
2063         jj++;
2064       }
2065       olens[i] = olen;
2066       dlens[i] = dlen;
2067     }
2068     ierr = MatCreateMPIAIJ(comm,m,nlocal,PETSC_DECIDE,n,0,dlens,0,olens,&M);CHKERRQ(ierr);
2069     ierr = PetscFree(dlens);CHKERRQ(ierr);
2070   } else {
2071     int ml,nl;
2072 
2073     M = *newmat;
2074     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2075     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2076     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2077     /*
2078          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2079        rather than the slower MatSetValues().
2080     */
2081     M->was_assembled = PETSC_TRUE;
2082     M->assembled     = PETSC_FALSE;
2083   }
2084   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2085   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2086   if (aij->indexshift) SETERRQ(PETSC_ERR_SUP,"No support for index shifted matrix");
2087   ii  = aij->i;
2088   jj  = aij->j;
2089   aa  = aij->a;
2090   for (i=0; i<m; i++) {
2091     row   = rstart + i;
2092     nz    = ii[i+1] - ii[i];
2093     cwork = jj;     jj += nz;
2094     vwork = aa;     aa += nz;
2095     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2096   }
2097 
2098   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2099   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2100   *newmat = M;
2101 
2102   /* save submatrix used in processor for next request */
2103   if (call ==  MAT_INITIAL_MATRIX) {
2104     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2105     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2106   }
2107 
2108   PetscFunctionReturn(0);
2109 }
2110 
2111 #undef __FUNCT__
2112 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2113 /*@C
2114    MatMPIAIJSetPreallocation - Creates a sparse parallel matrix in AIJ format
2115    (the default parallel PETSc format).  For good matrix assembly performance
2116    the user should preallocate the matrix storage by setting the parameters
2117    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2118    performance can be increased by more than a factor of 50.
2119 
2120    Collective on MPI_Comm
2121 
2122    Input Parameters:
2123 +  A - the matrix
2124 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2125            (same value is used for all local rows)
2126 .  d_nnz - array containing the number of nonzeros in the various rows of the
2127            DIAGONAL portion of the local submatrix (possibly different for each row)
2128            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2129            The size of this array is equal to the number of local rows, i.e 'm'.
2130            You must leave room for the diagonal entry even if it is zero.
2131 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2132            submatrix (same value is used for all local rows).
2133 -  o_nnz - array containing the number of nonzeros in the various rows of the
2134            OFF-DIAGONAL portion of the local submatrix (possibly different for
2135            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2136            structure. The size of this array is equal to the number
2137            of local rows, i.e 'm'.
2138 
2139    The AIJ format (also called the Yale sparse matrix format or
2140    compressed row storage), is fully compatible with standard Fortran 77
2141    storage.  That is, the stored row and column indices can begin at
2142    either one (as in Fortran) or zero.  See the users manual for details.
2143 
2144    The user MUST specify either the local or global matrix dimensions
2145    (possibly both).
2146 
2147    The parallel matrix is partitioned such that the first m0 rows belong to
2148    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2149    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2150 
2151    The DIAGONAL portion of the local submatrix of a processor can be defined
2152    as the submatrix which is obtained by extraction the part corresponding
2153    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2154    first row that belongs to the processor, and r2 is the last row belonging
2155    to the this processor. This is a square mxm matrix. The remaining portion
2156    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2157 
2158    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2159 
2160    By default, this format uses inodes (identical nodes) when possible.
2161    We search for consecutive rows with the same nonzero structure, thereby
2162    reusing matrix information to achieve increased efficiency.
2163 
2164    Options Database Keys:
2165 +  -mat_aij_no_inode  - Do not use inodes
2166 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2167 -  -mat_aij_oneindex - Internally use indexing starting at 1
2168         rather than 0.  Note that when calling MatSetValues(),
2169         the user still MUST index entries starting at 0!
2170 
2171    Example usage:
2172 
2173    Consider the following 8x8 matrix with 34 non-zero values, that is
2174    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2175    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2176    as follows:
2177 
2178 .vb
2179             1  2  0  |  0  3  0  |  0  4
2180     Proc0   0  5  6  |  7  0  0  |  8  0
2181             9  0 10  | 11  0  0  | 12  0
2182     -------------------------------------
2183            13  0 14  | 15 16 17  |  0  0
2184     Proc1   0 18  0  | 19 20 21  |  0  0
2185             0  0  0  | 22 23  0  | 24  0
2186     -------------------------------------
2187     Proc2  25 26 27  |  0  0 28  | 29  0
2188            30  0  0  | 31 32 33  |  0 34
2189 .ve
2190 
2191    This can be represented as a collection of submatrices as:
2192 
2193 .vb
2194       A B C
2195       D E F
2196       G H I
2197 .ve
2198 
2199    Where the submatrices A,B,C are owned by proc0, D,E,F are
2200    owned by proc1, G,H,I are owned by proc2.
2201 
2202    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2203    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2204    The 'M','N' parameters are 8,8, and have the same values on all procs.
2205 
2206    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2207    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2208    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2209    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2210    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2211    matrix, ans [DF] as another SeqAIJ matrix.
2212 
2213    When d_nz, o_nz parameters are specified, d_nz storage elements are
2214    allocated for every row of the local diagonal submatrix, and o_nz
2215    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2216    One way to choose d_nz and o_nz is to use the max nonzerors per local
2217    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2218    In this case, the values of d_nz,o_nz are:
2219 .vb
2220      proc0 : dnz = 2, o_nz = 2
2221      proc1 : dnz = 3, o_nz = 2
2222      proc2 : dnz = 1, o_nz = 4
2223 .ve
2224    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2225    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2226    for proc3. i.e we are using 12+15+10=37 storage locations to store
2227    34 values.
2228 
2229    When d_nnz, o_nnz parameters are specified, the storage is specified
2230    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2231    In the above case the values for d_nnz,o_nnz are:
2232 .vb
2233      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2234      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2235      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2236 .ve
2237    Here the space allocated is sum of all the above values i.e 34, and
2238    hence pre-allocation is perfect.
2239 
2240    Level: intermediate
2241 
2242 .keywords: matrix, aij, compressed row, sparse, parallel
2243 
2244 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2245 @*/
2246 int MatMPIAIJSetPreallocation(Mat B,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2247 {
2248   Mat_MPIAIJ   *b;
2249   int          ierr,i;
2250   PetscTruth   flg2;
2251 
2252   PetscFunctionBegin;
2253   ierr = PetscTypeCompare((PetscObject)B,MATMPIAIJ,&flg2);CHKERRQ(ierr);
2254   if (!flg2) PetscFunctionReturn(0);
2255   B->preallocated = PETSC_TRUE;
2256   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2257   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2258   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2259   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2260   if (d_nnz) {
2261     for (i=0; i<B->m; i++) {
2262       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %d value %d",i,d_nnz[i]);
2263     }
2264   }
2265   if (o_nnz) {
2266     for (i=0; i<B->m; i++) {
2267       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %d value %d",i,o_nnz[i]);
2268     }
2269   }
2270   b = (Mat_MPIAIJ*)B->data;
2271 
2272   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2273   PetscLogObjectParent(B,b->A);
2274   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2275   PetscLogObjectParent(B,b->B);
2276 
2277   PetscFunctionReturn(0);
2278 }
2279 
2280 #undef __FUNCT__
2281 #define __FUNCT__ "MatCreateMPIAIJ"
2282 /*@C
2283    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2284    (the default parallel PETSc format).  For good matrix assembly performance
2285    the user should preallocate the matrix storage by setting the parameters
2286    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2287    performance can be increased by more than a factor of 50.
2288 
2289    Collective on MPI_Comm
2290 
2291    Input Parameters:
2292 +  comm - MPI communicator
2293 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2294            This value should be the same as the local size used in creating the
2295            y vector for the matrix-vector product y = Ax.
2296 .  n - This value should be the same as the local size used in creating the
2297        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2298        calculated if N is given) For square matrices n is almost always m.
2299 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2300 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2301 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2302            (same value is used for all local rows)
2303 .  d_nnz - array containing the number of nonzeros in the various rows of the
2304            DIAGONAL portion of the local submatrix (possibly different for each row)
2305            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2306            The size of this array is equal to the number of local rows, i.e 'm'.
2307            You must leave room for the diagonal entry even if it is zero.
2308 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2309            submatrix (same value is used for all local rows).
2310 -  o_nnz - array containing the number of nonzeros in the various rows of the
2311            OFF-DIAGONAL portion of the local submatrix (possibly different for
2312            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2313            structure. The size of this array is equal to the number
2314            of local rows, i.e 'm'.
2315 
2316    Output Parameter:
2317 .  A - the matrix
2318 
2319    Notes:
2320    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2321    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2322    storage requirements for this matrix.
2323 
2324    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2325    processor than it must be used on all processors that share the object for
2326    that argument.
2327 
2328    The AIJ format (also called the Yale sparse matrix format or
2329    compressed row storage), is fully compatible with standard Fortran 77
2330    storage.  That is, the stored row and column indices can begin at
2331    either one (as in Fortran) or zero.  See the users manual for details.
2332 
2333    The user MUST specify either the local or global matrix dimensions
2334    (possibly both).
2335 
2336    The parallel matrix is partitioned such that the first m0 rows belong to
2337    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2338    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2339 
2340    The DIAGONAL portion of the local submatrix of a processor can be defined
2341    as the submatrix which is obtained by extraction the part corresponding
2342    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2343    first row that belongs to the processor, and r2 is the last row belonging
2344    to the this processor. This is a square mxm matrix. The remaining portion
2345    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2346 
2347    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2348 
2349    By default, this format uses inodes (identical nodes) when possible.
2350    We search for consecutive rows with the same nonzero structure, thereby
2351    reusing matrix information to achieve increased efficiency.
2352 
2353    Options Database Keys:
2354 +  -mat_aij_no_inode  - Do not use inodes
2355 .  -mat_aij_inode_limit <limit> - Sets inode limit (max limit=5)
2356 -  -mat_aij_oneindex - Internally use indexing starting at 1
2357         rather than 0.  Note that when calling MatSetValues(),
2358         the user still MUST index entries starting at 0!
2359 
2360 
2361    Example usage:
2362 
2363    Consider the following 8x8 matrix with 34 non-zero values, that is
2364    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2365    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2366    as follows:
2367 
2368 .vb
2369             1  2  0  |  0  3  0  |  0  4
2370     Proc0   0  5  6  |  7  0  0  |  8  0
2371             9  0 10  | 11  0  0  | 12  0
2372     -------------------------------------
2373            13  0 14  | 15 16 17  |  0  0
2374     Proc1   0 18  0  | 19 20 21  |  0  0
2375             0  0  0  | 22 23  0  | 24  0
2376     -------------------------------------
2377     Proc2  25 26 27  |  0  0 28  | 29  0
2378            30  0  0  | 31 32 33  |  0 34
2379 .ve
2380 
2381    This can be represented as a collection of submatrices as:
2382 
2383 .vb
2384       A B C
2385       D E F
2386       G H I
2387 .ve
2388 
2389    Where the submatrices A,B,C are owned by proc0, D,E,F are
2390    owned by proc1, G,H,I are owned by proc2.
2391 
2392    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2393    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2394    The 'M','N' parameters are 8,8, and have the same values on all procs.
2395 
2396    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2397    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2398    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2399    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2400    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2401    matrix, ans [DF] as another SeqAIJ matrix.
2402 
2403    When d_nz, o_nz parameters are specified, d_nz storage elements are
2404    allocated for every row of the local diagonal submatrix, and o_nz
2405    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2406    One way to choose d_nz and o_nz is to use the max nonzerors per local
2407    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2408    In this case, the values of d_nz,o_nz are:
2409 .vb
2410      proc0 : dnz = 2, o_nz = 2
2411      proc1 : dnz = 3, o_nz = 2
2412      proc2 : dnz = 1, o_nz = 4
2413 .ve
2414    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2415    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2416    for proc3. i.e we are using 12+15+10=37 storage locations to store
2417    34 values.
2418 
2419    When d_nnz, o_nnz parameters are specified, the storage is specified
2420    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2421    In the above case the values for d_nnz,o_nnz are:
2422 .vb
2423      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2424      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2425      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2426 .ve
2427    Here the space allocated is sum of all the above values i.e 34, and
2428    hence pre-allocation is perfect.
2429 
2430    Level: intermediate
2431 
2432 .keywords: matrix, aij, compressed row, sparse, parallel
2433 
2434 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues()
2435 @*/
2436 int MatCreateMPIAIJ(MPI_Comm comm,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2437 {
2438   int ierr,size;
2439 
2440   PetscFunctionBegin;
2441   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2442   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2443   if (size > 1) {
2444     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2445     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2446   } else {
2447     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2448     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2449   }
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2455 int MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2456 {
2457   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2458   PetscFunctionBegin;
2459   *Ad     = a->A;
2460   *Ao     = a->B;
2461   *colmap = a->garray;
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2467 int MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2468 {
2469   int        ierr;
2470   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2471 
2472   PetscFunctionBegin;
2473   if (coloring->ctype == IS_COLORING_LOCAL) {
2474     int        *allcolors,*colors,i;
2475     ISColoring ocoloring;
2476 
2477     /* set coloring for diagonal portion */
2478     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2479 
2480     /* set coloring for off-diagonal portion */
2481     ierr = ISAllGatherIndices(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2482     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2483     for (i=0; i<a->B->n; i++) {
2484       colors[i] = allcolors[a->garray[i]];
2485     }
2486     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2487     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2488     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2489     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2490   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2491     int        *colors,i,*larray;
2492     ISColoring ocoloring;
2493 
2494     /* set coloring for diagonal portion */
2495     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2496     for (i=0; i<a->A->n; i++) {
2497       larray[i] = i + a->cstart;
2498     }
2499     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2500     ierr = PetscMalloc((a->A->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2501     for (i=0; i<a->A->n; i++) {
2502       colors[i] = coloring->colors[larray[i]];
2503     }
2504     ierr = PetscFree(larray);CHKERRQ(ierr);
2505     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2506     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2507     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2508 
2509     /* set coloring for off-diagonal portion */
2510     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&larray);CHKERRQ(ierr);
2511     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2512     ierr = PetscMalloc((a->B->n+1)*sizeof(int),&colors);CHKERRQ(ierr);
2513     for (i=0; i<a->B->n; i++) {
2514       colors[i] = coloring->colors[larray[i]];
2515     }
2516     ierr = PetscFree(larray);CHKERRQ(ierr);
2517     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2518     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2519     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2520   } else {
2521     SETERRQ1(1,"No support ISColoringType %d",coloring->ctype);
2522   }
2523 
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2529 int MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2530 {
2531   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2532   int        ierr;
2533 
2534   PetscFunctionBegin;
2535   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2536   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2542 int MatSetValuesAdifor_MPIAIJ(Mat A,int nl,void *advalues)
2543 {
2544   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2545   int        ierr;
2546 
2547   PetscFunctionBegin;
2548   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2549   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatMerge"
2555 /*@C
2556       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2557                  matrices from each processor
2558 
2559     Collective on MPI_Comm
2560 
2561    Input Parameters:
2562 +    comm - the communicators the parallel matrix will live on
2563 -    inmat - the input sequential matrices
2564 
2565    Output Parameter:
2566 .    outmat - the parallel matrix generated
2567 
2568     Level: advanced
2569 
2570    Notes: The number of columns of the matrix in EACH of the seperate files
2571       MUST be the same.
2572 
2573 @*/
2574 int MatMerge(MPI_Comm comm,Mat inmat, Mat *outmat)
2575 {
2576   int         ierr,m,n,i,rstart,*indx,nnz,I,*dnz,*onz;
2577   PetscScalar *values;
2578   PetscMap    columnmap,rowmap;
2579 
2580   PetscFunctionBegin;
2581 
2582   ierr = MatGetSize(inmat,&m,&n);CHKERRQ(ierr);
2583 
2584   /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2585   ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2586   ierr = PetscMapSetSize(columnmap,n);CHKERRQ(ierr);
2587   ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2588   ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2589   ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2590 
2591   ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2592   ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2593   ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2594   ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2595   ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2596 
2597   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2598   for (i=0;i<m;i++) {
2599     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2600     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2601     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2602   }
2603   ierr = MatCreateMPIAIJ(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,0,dnz,0,onz,outmat);CHKERRQ(ierr);
2604   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2605 
2606   for (i=0;i<m;i++) {
2607     ierr = MatGetRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2608     I    = i + rstart;
2609     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2610     ierr = MatRestoreRow(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2611   }
2612   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2613   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2614   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2615 
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 #undef __FUNCT__
2620 #define __FUNCT__ "MatFileSplit"
2621 int MatFileSplit(Mat A,char *outfile)
2622 {
2623   int         ierr,rank,len,m,N,i,rstart,*indx,nnz;
2624   PetscViewer out;
2625   char        *name;
2626   Mat         B;
2627   PetscScalar *values;
2628 
2629   PetscFunctionBegin;
2630 
2631   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2632   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2633   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,m,N,0,0,&B);CHKERRQ(ierr);
2634   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2635   for (i=0;i<m;i++) {
2636     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2637     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2638     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2639   }
2640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2642 
2643   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2644   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2645   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2646   sprintf(name,"%s.%d",outfile,rank);
2647   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_BINARY_CREATE,&out);CHKERRQ(ierr);
2648   ierr = PetscFree(name);
2649   ierr = MatView(B,out);CHKERRQ(ierr);
2650   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2651   ierr = MatDestroy(B);CHKERRQ(ierr);
2652   PetscFunctionReturn(0);
2653 }
2654